PyPI - triton-model-analyzer - Versions diffs - 1.48.0__py3-none-any.whl → 1.50.0__py3-none-any.whl - Mend

triton-model-analyzer 1.48.0py3-none-any.whl → 1.50.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py CHANGED Viewed

@@ -1,18 +1,6 @@
 #!/usr/bin/env python3
-# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 import logging
 from copy import deepcopy
@@ -132,9 +120,11 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
             for result in top_results:
                 run_config = deepcopy(result.run_config())
                 model_parameters = self._get_model_parameters(model_name)
+                perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name)
                 parameter_search = ParameterSearch(
                     config=self._config,
                     model_parameters=model_parameters,
+                    perf_analyzer_flags=perf_analyzer_flags,
                     skip_parameter_sweep=True,
                 )
                 for parameter in parameter_search.search_parameters():
@@ -151,6 +141,12 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
         return {}
+    def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict:
+        for model in self._models:
+            if model_name == model.model_name():
+                return model.perf_analyzer_flags()
+        return {}
     def _set_parameter(
         self, run_config: RunConfig, model_parameters: Dict, parameter: int
     ) -> RunConfig:

model_analyzer/config/generate/model_profile_spec.py CHANGED Viewed

@@ -1,27 +1,16 @@
 #!/usr/bin/env python3
-# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 from copy import deepcopy
-from typing import List
+from typing import List, Optional
 from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
 from model_analyzer.config.input.objects.config_model_profile_spec import (
     ConfigModelProfileSpec,
 )
 from model_analyzer.device.gpu_device import GPUDevice
+from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
 from model_analyzer.triton.client.client import TritonClient
 from model_analyzer.triton.model.model_config import ModelConfig
@@ -38,15 +27,72 @@ class ModelProfileSpec(ConfigModelProfileSpec):
         client: TritonClient,
         gpus: List[GPUDevice],
     ):
-        super().__init__(spec.model_name())
-        self.__dict__ = deepcopy(spec.__dict__)
+        # Determine cpu_only based on priority (before copying spec's __dict__):
+        # 1) User-specified kind in instance_group (highest)
+        # 2) cpu_only_composing_models config
+        # 3) Inherited from spec (default False)
+        explicit_kind = self._get_explicit_instance_kind(spec)
+        if explicit_kind == "KIND_CPU":
+            cpu_only = True
+        elif explicit_kind == "KIND_GPU":
+            cpu_only = False
+        elif spec.model_name() in config.cpu_only_composing_models:
+            cpu_only = True
+        else:
+            cpu_only = spec.cpu_only()
+        super().__init__(spec.model_name(), cpu_only=cpu_only)
+        # Copy remaining attributes from spec (excluding _cpu_only which we set above)
+        spec_dict = deepcopy(spec.__dict__)
+        spec_dict.pop("_cpu_only", None)
+        self.__dict__.update(spec_dict)
         self._default_model_config = ModelConfig.create_model_config_dict(
             config, client, gpus, config.model_repository, spec.model_name()
         )
-        if spec.model_name() in config.cpu_only_composing_models:
-            self._cpu_only = True
+    @staticmethod
+    def _get_explicit_instance_kind(spec: ConfigModelProfileSpec) -> Optional[str]:
+        """
+        Check if the spec has an explicit kind specified in instance_group.
+        Returns the kind if explicitly specified, None otherwise.
+        This allows users to specify KIND_CPU or KIND_GPU directly in
+        model_config_parameters.instance_group instead of using the
+        separate cpu_only_composing_models config option.
+        The config parser may wrap values in lists for sweep support, so we need
+        to handle structures like:
+        - [[{'count': [1, 2, 4], 'kind': ['KIND_CPU']}]]  (double-wrapped, kind is list)
+        - [{'count': [1, 2, 4], 'kind': 'KIND_CPU'}]      (single-wrapped, kind is string)
+        """
+        model_config_params = spec.model_config_parameters()
+        if model_config_params is None:
+            return None
+        instance_group = model_config_params.get("instance_group")
+        if instance_group is None or not isinstance(instance_group, list):
+            return None
+        # instance_group structure can be doubly wrapped due to config parsing:
+        # [[ {'kind': ['KIND_GPU'], 'count': [1, 2, 4]} ]]
+        # Unwrap the nested structure if needed
+        if len(instance_group) > 0 and isinstance(instance_group[0], list):
+            instance_group = instance_group[0]
+        # instance_group is now a list of dicts, each potentially containing 'kind'
+        for ig in instance_group:
+            if isinstance(ig, dict):
+                kind = ig.get("kind")
+                # Handle case where kind is wrapped in a list by config parser
+                # e.g., ['KIND_CPU'] instead of 'KIND_CPU'
+                if isinstance(kind, list) and len(kind) > 0:
+                    kind = kind[0]
+                if kind in ("KIND_CPU", "KIND_GPU"):
+                    return kind
+        return None
     def get_default_config(self) -> dict:
         """Returns the default configuration for this model"""
@@ -72,3 +118,14 @@ class ModelProfileSpec(ConfigModelProfileSpec):
     def is_ensemble(self) -> bool:
         """Returns true if the model is an ensemble"""
         return "ensemble_scheduling" in self._default_model_config
+    def is_load_specified(self) -> bool:
+        """
+        Returns true if the model's PA config has specified any of the
+        inference load args (such as concurrency). Else returns false
+        """
+        load_args = PerfAnalyzerConfig.get_inference_load_args()
+        pa_flags = self.perf_analyzer_flags()
+        if pa_flags is None:
+            return False
+        return any(e in pa_flags for e in load_args)

model_analyzer/config/generate/perf_analyzer_config_generator.py CHANGED Viewed

@@ -1,18 +1,6 @@
 #!/usr/bin/env python3
-# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 import logging
 from typing import Generator, List, Optional
@@ -169,10 +157,12 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
             self._parameter_results.extend(measurement)
     def _create_parameter_list(self) -> List[int]:
-        # The two possible parameters are request rate or concurrency
-        # Concurrency is the default and will be used unless the user specifies
-        # request rate, either as a model parameter or a config option
-        if self._cli_config.is_request_rate_specified(self._model_parameters):
+        # Determines the inference load (concurrency or request-rate or request-intervals)
+        # and creates the list of values to use. If nothing is specified by the user, then
+        # concurrency will be used.
+        if "request-intervals" in self._perf_analyzer_flags:
+            return [self._perf_analyzer_flags["request-intervals"]]
+        elif self._cli_config.is_request_rate_specified(self._model_parameters):
             return self._create_request_rate_list()
         else:
             return self._create_concurrency_list()
@@ -207,7 +197,7 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
         for params in utils.generate_parameter_combinations(
             perf_config_non_parameter_values
         ):
-            configs_with_concurrency = []
+            configs_with_inference_load = []
             for parameter in self._parameters:
                 new_perf_config = PerfAnalyzerConfig()
@@ -217,7 +207,9 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
                 new_perf_config.update_config(params)
-                if self._cli_config.is_request_rate_specified(self._model_parameters):
+                if "request-intervals" in self._perf_analyzer_flags:
+                    pass
+                elif self._cli_config.is_request_rate_specified(self._model_parameters):
                     new_perf_config.update_config({"request-rate-range": parameter})
                 else:
                     new_perf_config.update_config({"concurrency-range": parameter})
@@ -225,8 +217,8 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
                 # User provided flags can override the search parameters
                 new_perf_config.update_config(self._perf_analyzer_flags)
-                configs_with_concurrency.append(new_perf_config)
-            self._configs.append(configs_with_concurrency)
+                configs_with_inference_load.append(new_perf_config)
+            self._configs.append(configs_with_inference_load)
     def _create_non_parameter_perf_config_values(self) -> dict:
         perf_config_values = {

model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py CHANGED Viewed

@@ -1,21 +1,8 @@
 #!/usr/bin/env python3
-# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 import logging
-from copy import deepcopy
 from typing import Generator, List, Optional
 from model_analyzer.config.generate.concurrency_sweeper import ConcurrencySweeper
@@ -30,7 +17,6 @@ from model_analyzer.config.generate.search_config import SearchConfig
 from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
 from model_analyzer.config.run.run_config import RunConfig
 from model_analyzer.constants import LOGGER_NAME
-from model_analyzer.result.parameter_search import ParameterSearch
 from model_analyzer.result.result_manager import ResultManager
 from model_analyzer.result.run_config_measurement import RunConfigMeasurement

model_analyzer/config/generate/quick_run_config_generator.py CHANGED Viewed

@@ -1,18 +1,6 @@
 #!/usr/bin/env python3
-# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 import logging
 from sys import maxsize
@@ -37,6 +25,7 @@ from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
 from model_analyzer.config.run.model_run_config import ModelRunConfig
 from model_analyzer.config.run.run_config import RunConfig
 from model_analyzer.constants import LOGGER_NAME
+from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
 from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
 from model_analyzer.result.run_config_measurement import RunConfigMeasurement
 from model_analyzer.triton.model.model_config import ModelConfig
@@ -437,33 +426,53 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
         )
         model_config_params = deepcopy(model.model_config_parameters())
+        # Extract user-specified instance_group kind before removing it
+        instance_kind = self._extract_instance_group_kind(model_config_params)
+        if not instance_kind:
+            # Fallback to cpu_only flag
+            instance_kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
         if model_config_params:
+            # Remove parameters that are controlled by search dimensions
             model_config_params.pop("max_batch_size", None)
+            model_config_params.pop("instance_group", None)
-            # This is guaranteed to only generate one combination (check is in config_command)
+            # Generate combinations from remaining parameters
+            # For composing models, this may include dynamic_batching settings, etc.
             param_combos = GeneratorUtils.generate_combinations(model_config_params)
-            assert len(param_combos) == 1
-            param_combo = param_combos[0]
+            # Top-level models must have exactly 1 combination (validated earlier)
+            # Composing models can have 1 combination (non-searchable params are fixed)
+            if len(param_combos) > 1:
+                raise TritonModelAnalyzerException(
+                    f"Model {model.model_name()} has multiple parameter combinations "
+                    f"after removing searchable parameters. This should have been caught "
+                    f"during config validation."
+                )
+            param_combo = param_combos[0] if param_combos else {}
         else:
             param_combo = {}
-        kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
+        # Add instance_group with count from dimension and kind from config
         instance_count = self._calculate_instance_count(dimension_values)
         param_combo["instance_group"] = [
             {
                 "count": instance_count,
-                "kind": kind,
+                "kind": instance_kind,
             }
         ]
+        # Add max_batch_size from dimension if applicable
         if "max_batch_size" in dimension_values:
             param_combo["max_batch_size"] = self._calculate_model_batch_size(
                 dimension_values
             )
-        if model.supports_dynamic_batching():
+        # Add default dynamic_batching if model supports it and not already specified
+        # Preserves user-specified dynamic_batching settings (single combinations only)
+        if model.supports_dynamic_batching() and "dynamic_batching" not in param_combo:
             param_combo["dynamic_batching"] = {}
         model_config_variant = BaseModelConfigGenerator.make_model_config_variant(
@@ -475,6 +484,48 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
         return model_config_variant
+    def _extract_instance_group_kind(self, model_config_params: dict) -> str:
+        """
+        Extract the 'kind' field from instance_group in model_config_parameters.
+        The config parser may wrap values in lists for sweep support, so we need
+        to handle structures like:
+        - [[{'count': [1, 2, 4], 'kind': ['KIND_CPU']}]]  (double-wrapped, kind is list)
+        - [{'count': [1, 2, 4], 'kind': 'KIND_CPU'}]      (single-wrapped, kind is string)
+        Returns empty string if not found or if instance_group is not specified.
+        """
+        if not model_config_params or "instance_group" not in model_config_params:
+            return ""
+        instance_group = model_config_params["instance_group"]
+        # Handle various nested list structures from config parsing
+        if isinstance(instance_group, list) and len(instance_group) > 0:
+            # Handle nested structure: [[ {...} ]]
+            while (
+                isinstance(instance_group, list)
+                and len(instance_group) > 0
+                and isinstance(instance_group[0], list)
+            ):
+                instance_group = instance_group[0]
+            # Now should have [{...}] structure
+            if (
+                isinstance(instance_group, list)
+                and len(instance_group) > 0
+                and isinstance(instance_group[0], dict)
+            ):
+                kind = instance_group[0].get("kind", "")
+                # Handle case where kind is wrapped in a list by config parser
+                # e.g., ['KIND_CPU'] instead of 'KIND_CPU'
+                if isinstance(kind, list) and len(kind) > 0:
+                    kind = kind[0]
+                if isinstance(kind, str) and kind in ("KIND_CPU", "KIND_GPU"):
+                    return kind
+        return ""
     def _create_next_model_run_config(
         self,
         model: ModelProfileSpec,
@@ -507,13 +558,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
         perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
-        concurrency = self._calculate_concurrency(dimension_values)
-        perf_config_params = {
-            "batch-size": DEFAULT_BATCH_SIZES,
-            "concurrency-range": concurrency,
-        }
-        perf_analyzer_config.update_config(perf_config_params)
+        if not model.is_load_specified():
+            concurrency = self._calculate_concurrency(dimension_values)
+            perf_config_params = {
+                "batch-size": DEFAULT_BATCH_SIZES,
+                "concurrency-range": concurrency,
+            }
+            perf_analyzer_config.update_config(perf_config_params)
         perf_analyzer_config.update_config(model.perf_analyzer_flags())
         return perf_analyzer_config
@@ -703,13 +754,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
             model_config.get_field("name"), self._config
         )
-        default_concurrency = self._calculate_default_concurrency(model_config)
-        perf_config_params = {
-            "batch-size": DEFAULT_BATCH_SIZES,
-            "concurrency-range": default_concurrency,
-        }
-        default_perf_analyzer_config.update_config(perf_config_params)
+        if not model.is_load_specified():
+            default_concurrency = self._calculate_default_concurrency(model_config)
+            perf_config_params = {
+                "batch-size": DEFAULT_BATCH_SIZES,
+                "concurrency-range": default_concurrency,
+            }
+            default_perf_analyzer_config.update_config(perf_config_params)
         default_perf_analyzer_config.update_config(model.perf_analyzer_flags())

triton-model-analyzer 1.48.0__py3-none-any.whl → 1.50.0__py3-none-any.whl

triton-model-analyzer 1.48.0py3-none-any.whl → 1.50.0py3-none-any.whl