PyPI - triton-model-analyzer - Versions diffs - 1.48.0__py3-none-any.whl → 1.50.0__py3-none-any.whl - Mend

triton-model-analyzer 1.48.0py3-none-any.whl → 1.50.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

model_analyzer/config/generate/run_config_generator_factory.py CHANGED Viewed

@@ -1,19 +1,9 @@
 #!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
-# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+import logging
+import math
 from typing import Dict, List
 from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
@@ -25,7 +15,7 @@ from model_analyzer.config.input.config_command_profile import ConfigCommandProf
 from model_analyzer.config.input.objects.config_model_profile_spec import (
     ConfigModelProfileSpec,
 )
-from model_analyzer.constants import MIN_INITIALIZED, RADIUS
+from model_analyzer.constants import LOGGER_NAME, MIN_INITIALIZED, RADIUS
 from model_analyzer.device.gpu_device import GPUDevice
 from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
 from model_analyzer.result.result_manager import ResultManager
@@ -47,6 +37,8 @@ from .search_config import SearchConfig
 from .search_dimension import SearchDimension
 from .search_dimensions import SearchDimensions
+logger = logging.getLogger(LOGGER_NAME)
 class RunConfigGeneratorFactory:
     """
@@ -221,9 +213,7 @@ class RunConfigGeneratorFactory:
             if model.is_ensemble():
                 continue
-            dims = RunConfigGeneratorFactory._get_dimensions_for_model(
-                model.supports_batching()
-            )
+            dims = RunConfigGeneratorFactory._get_dimensions_for_model(model)
             dimensions.add_dimensions(index, dims)
             index += 1
@@ -234,14 +224,161 @@ class RunConfigGeneratorFactory:
         return search_config
     @staticmethod
-    def _get_dimensions_for_model(is_batching_supported: bool) -> List[SearchDimension]:
-        if is_batching_supported:
-            return RunConfigGeneratorFactory._get_batching_supported_dimensions()
+    def _get_dimensions_for_model(model: ModelProfileSpec) -> List[SearchDimension]:
+        """
+        Create search dimensions for a model, respecting user-specified
+        instance_group count lists if provided.
+        """
+        dims = []
+        # Check if user specified instance_group with a count list
+        instance_count_list = RunConfigGeneratorFactory._get_instance_count_list(model)
+        if instance_count_list:
+            # User specified a list - create constrained dimension
+            dim = RunConfigGeneratorFactory._create_instance_dimension_from_list(
+                instance_count_list
+            )
+            dims.append(dim)
+        else:
+            # Use default unbounded dimension
+            dims.append(
+                SearchDimension("instance_count", SearchDimension.DIMENSION_TYPE_LINEAR)
+            )
+        # Add max_batch_size dimension if model supports batching
+        if model.supports_batching():
+            # For now, max_batch_size always uses default exponential dimension
+            # Could be extended to support user-specified lists in the future
+            dims.insert(
+                0,
+                SearchDimension(
+                    "max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL
+                ),
+            )
+        return dims
+    @staticmethod
+    def _get_instance_count_list(model: ModelProfileSpec) -> List[int]:
+        """
+        Extract instance_group count list from model config parameters if specified.
+        Returns empty list if not specified or not a list.
+        """
+        model_config_params = model.model_config_parameters()
+        if not model_config_params:
+            return []
+        if "instance_group" not in model_config_params:
+            return []
+        # instance_group structure: [[ {'kind': 'KIND_GPU', 'count': [1, 2, 4]} ]]
+        # The outer lists are from config parsing wrapping
+        instance_group = model_config_params["instance_group"]
+        if not instance_group or not isinstance(instance_group, list):
+            return []
+        # Unwrap the nested structure
+        if len(instance_group) > 0 and isinstance(instance_group[0], list):
+            instance_group = instance_group[0]
+        if len(instance_group) == 0 or not isinstance(instance_group[0], dict):
+            return []
+        count = instance_group[0].get("count")
+        if isinstance(count, list) and len(count) > 0:
+            return count
+        return []
+    @staticmethod
+    def _create_instance_dimension_from_list(
+        count_list: List[int],
+    ) -> SearchDimension:
+        """
+        Create a SearchDimension for instance_count from a user-specified list.
+        For lists that are powers of 2 (e.g., [1, 2, 4, 8, 16, 32]),
+        uses EXPONENTIAL dimension type with appropriate min/max indexes.
+        For other lists, uses LINEAR dimension type with appropriate min/max.
+        Raises TritonModelAnalyzerException if the list is not compatible with
+        either LINEAR or EXPONENTIAL growth patterns.
+        """
+        if not count_list or len(count_list) == 0:
+            raise TritonModelAnalyzerException("Instance count list cannot be empty")
+        # Sort the list to check for patterns
+        sorted_counts = sorted(count_list)
+        # Check if it's powers of 2
+        if RunConfigGeneratorFactory._is_powers_of_two(sorted_counts):
+            # Use EXPONENTIAL: 2^idx gives the value
+            # For [1, 2, 4, 8, 16, 32]: min_idx=0 (2^0=1), max_idx=5 (2^5=32)
+            min_idx = int(math.log2(sorted_counts[0]))
+            max_idx = int(math.log2(sorted_counts[-1]))
+            return SearchDimension(
+                "instance_count",
+                SearchDimension.DIMENSION_TYPE_EXPONENTIAL,
+                min=min_idx,
+                max=max_idx,
+            )
+        # Check if it's a contiguous linear sequence
+        elif RunConfigGeneratorFactory._is_linear_sequence(sorted_counts):
+            # Use LINEAR: idx+1 gives the value (LINEAR starts at 1, not 0)
+            # For [1, 2, 3, 4]: min_idx=0 (0+1=1), max_idx=3 (3+1=4)
+            min_idx = sorted_counts[0] - 1
+            max_idx = sorted_counts[-1] - 1
+            return SearchDimension(
+                "instance_count",
+                SearchDimension.DIMENSION_TYPE_LINEAR,
+                min=min_idx,
+                max=max_idx,
+            )
         else:
-            return RunConfigGeneratorFactory._get_batching_not_supported_dimensions()
+            # List is not compatible with LINEAR or EXPONENTIAL
+            raise TritonModelAnalyzerException(
+                f"Instance count list {count_list} is not compatible with Quick search mode. "
+                f"Lists must be either powers of 2 (e.g., [1, 2, 4, 8, 16, 32]) "
+                f"or a contiguous sequence (e.g., [1, 2, 3, 4, 5])."
+            )
+    @staticmethod
+    def _is_powers_of_two(sorted_list: List[int]) -> bool:
+        """Check if all values in the list are powers of 2 and form a valid sequence."""
+        for val in sorted_list:
+            if val <= 0:
+                return False
+            # Check if val is a power of 2: log2(val) should be an integer
+            log_val = math.log2(val)
+            if not log_val.is_integer():
+                return False
+        return True
+    @staticmethod
+    def _is_linear_sequence(sorted_list: List[int]) -> bool:
+        """Check if the list is a contiguous linear sequence."""
+        if len(sorted_list) < 2:
+            return True
+        # Check if values are consecutive: diff should always be 1
+        for i in range(1, len(sorted_list)):
+            if sorted_list[i] - sorted_list[i - 1] != 1:
+                return False
+        return True
     @staticmethod
     def _get_batching_supported_dimensions() -> List[SearchDimension]:
+        """Legacy method - kept for backward compatibility."""
         return [
             SearchDimension(
                 f"max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL
@@ -251,6 +388,7 @@ class RunConfigGeneratorFactory:
     @staticmethod
     def _get_batching_not_supported_dimensions() -> List[SearchDimension]:
+        """Legacy method - kept for backward compatibility."""
         return [
             SearchDimension(f"instance_count", SearchDimension.DIMENSION_TYPE_LINEAR)
         ]
@@ -306,24 +444,50 @@ class RunConfigGeneratorFactory:
         gpus: List[GPUDevice],
     ) -> List[ModelProfileSpec]:
         """
-        Creates a list of Ensemble composing model configs based on the model
+        Creates a list of Ensemble composing model configs based on the model.
+        If user specified ensemble_composing_models configs, use those for matching models.
+        Otherwise, use auto-discovered configs from ensemble_scheduling.
         """
         model_config = ModelConfig.create_from_profile_spec(model, config, client, gpus)
         if not model_config.is_ensemble():
             return []
+        # Auto-discover composing model names from ensemble_scheduling
         ensemble_composing_model_names = model_config.get_ensemble_composing_models()
+        if ensemble_composing_model_names is None:
+            return []
-        ensemble_composing_model_specs = (
-            ConfigModelProfileSpec.model_list_to_config_model_profile_spec(
+        # Check if user provided configs for any of these models
+        user_provided_configs = {}
+        if config.ensemble_composing_models is not None:
+            for user_spec in config.ensemble_composing_models:
+                user_provided_configs[user_spec.model_name()] = user_spec
+        # Create ModelProfileSpecs, using user configs when available
+        ensemble_composing_model_configs = []
+        for model_name in ensemble_composing_model_names:
+            if model_name in user_provided_configs:
+                # Use user-provided config with model_config_parameters
+                model_spec = user_provided_configs[model_name]
+            else:
+                # Use auto-discovered config (just model name, no parameters)
+                model_spec = ConfigModelProfileSpec(model_name)
+            mps = ModelProfileSpec(model_spec, config, client, gpus)
+            ensemble_composing_model_configs.append(mps)
+        # Warn if user specified models that aren't in the ensemble
+        if user_provided_configs:
+            unused_models = set(user_provided_configs.keys()) - set(
                 ensemble_composing_model_names
             )
-        )
-        ensemble_composing_model_configs = [
-            ModelProfileSpec(ensemble_composing_model_spec, config, client, gpus)
-            for ensemble_composing_model_spec in ensemble_composing_model_specs
-        ]
+            if unused_models:
+                logger.warning(
+                    f"The following models in ensemble_composing_models were not found "
+                    f"in the ensemble '{model.model_name()}' and will be ignored: "
+                    f"{', '.join(sorted(unused_models))}"
+                )
         return ensemble_composing_model_configs

model_analyzer/config/input/config_command.py CHANGED Viewed

@@ -1,18 +1,6 @@
 #!/usr/bin/env python3
-# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 from argparse import Namespace
 from copy import deepcopy
@@ -251,8 +239,18 @@ class ConfigCommand:
         ):
             return
+        # Get composing model names for validation
+        bls_composing = (
+            self._get_config_value("bls_composing_models", args, yaml_config) or []
+        )
+        cpu_only_composing = (
+            self._get_config_value("cpu_only_composing_models", args, yaml_config) or []
+        )
         self._check_per_model_parameters(profile_models)
-        self._check_per_model_model_config_parameters(profile_models)
+        self._check_per_model_model_config_parameters(
+            profile_models, bls_composing, cpu_only_composing
+        )
     def _check_per_model_parameters(self, profile_models: Dict) -> None:
         for model in profile_models.values():
@@ -268,23 +266,81 @@ class ConfigCommand:
                     "\nPlease use brute search mode or remove concurrency/batch sizes list."
                 )
-    def _check_per_model_model_config_parameters(self, profile_models: Dict) -> None:
-        for model in profile_models.values():
+    def _check_per_model_model_config_parameters(
+        self, profile_models: Dict, bls_composing: List, cpu_only_composing: List
+    ) -> None:
+        for model_name, model in profile_models.items():
             if not "model_config_parameters" in model:
                 continue
+            # Check if this is a composing model
+            is_composing = False
+            if bls_composing:
+                # bls_composing might be a list of dicts or list of strings
+                if isinstance(bls_composing, list):
+                    is_composing = any(
+                        (isinstance(m, dict) and m.get("model_name") == model_name)
+                        or (isinstance(m, str) and m == model_name)
+                        for m in bls_composing
+                    )
+            if (
+                not is_composing
+                and cpu_only_composing
+                and model_name in cpu_only_composing
+            ):
+                is_composing = True
+            # Composing models are allowed to have these parameters with ranges
+            if is_composing:
+                continue
             if "max_batch_size" in model["model_config_parameters"]:
                 raise TritonModelAnalyzerException(
-                    f"\nProfiling of models in quick search mode is not supported with lists of max batch sizes."
+                    f"\nProfiling of top-level models in quick search mode is not supported with lists of max batch sizes."
                     "\nPlease use brute search mode or remove max batch size list."
+                    "\nNote: Composing models in ensembles/BLS can have max_batch_size ranges in Quick mode."
                 )
             if "instance_group" in model["model_config_parameters"]:
                 raise TritonModelAnalyzerException(
-                    f"\nProfiling of models in quick search mode is not supported with instance group as a model config parameter"
+                    f"\nProfiling of top-level models in quick search mode is not supported with instance group as a model config parameter."
                     "\nPlease use brute search mode or remove instance_group from 'model_config_parameters'."
+                    "\nNote: Composing models in ensembles/BLS can have instance_group with count ranges in Quick mode."
                 )
+    def _is_composing_model(self, model_name: str, config: Dict) -> bool:
+        """
+        Determine if a model is a composing model by checking:
+        1. If it's in bls_composing_models list
+        2. If it's in ensemble_composing_models list
+        3. If it's in cpu_only_composing_models list
+        Note: We cannot check ensemble_scheduling at this stage because
+        we haven't loaded the model configs from the repository yet.
+        Users must explicitly list ensemble composing models in ensemble_composing_models
+        to enable parameter ranges for them.
+        """
+        if "bls_composing_models" in config:
+            bls_composing = config["bls_composing_models"].value()
+            if bls_composing and any(
+                m.model_name() == model_name for m in bls_composing
+            ):
+                return True
+        if "ensemble_composing_models" in config:
+            ensemble_composing = config["ensemble_composing_models"].value()
+            if ensemble_composing and any(
+                m.model_name() == model_name for m in ensemble_composing
+            ):
+                return True
+        if "cpu_only_composing_models" in config:
+            cpu_only_composing = config["cpu_only_composing_models"].value()
+            if cpu_only_composing and model_name in cpu_only_composing:
+                return True
+        return False
     def _check_quick_search_model_config_parameters_combinations(self) -> None:
         config = self.get_config()
         if not "profile_models" in config:
@@ -295,13 +351,18 @@ class ConfigCommand:
         profile_models = config["profile_models"].value()
         for model in profile_models:
+            # Composing models are allowed to have parameter ranges in Quick mode
+            if self._is_composing_model(model.model_name(), config):
+                continue
             model_config_params = deepcopy(model.model_config_parameters())
             if model_config_params:
                 if len(GeneratorUtils.generate_combinations(model_config_params)) > 1:
                     raise TritonModelAnalyzerException(
-                        f"\nProfiling of models in quick search mode is not supported for the specified model config parameters, "
+                        f"\nProfiling of top-level models in quick search mode is not supported for the specified model config parameters, "
                         f"as more than one combination of parameters can be generated."
                         f"\nPlease use brute search mode to profile or remove the model config parameters specified."
+                        f"\nNote: Composing models in ensembles/BLS can have parameter ranges in Quick mode."
                     )
     def _check_bls_no_brute_search(

model_analyzer/config/input/config_command_profile.py CHANGED Viewed

@@ -1,18 +1,6 @@
 #!/usr/bin/env python3
-# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 import argparse
 import logging
@@ -803,6 +791,39 @@ class ConfigCommandProfile(ConfigCommand):
                 description="List of the models to be profiled",
             )
         )
+        self._add_config(
+            ConfigField(
+                "ensemble_composing_models",
+                flags=["--ensemble-composing-models"],
+                field_type=ConfigUnion(
+                    [
+                        profile_model_scheme,
+                        ConfigListGeneric(
+                            ConfigUnion(
+                                [
+                                    profile_model_scheme,
+                                    ConfigPrimitive(
+                                        str,
+                                        output_mapper=ConfigModelProfileSpec.model_str_to_config_model_profile_spec,
+                                    ),
+                                ]
+                            ),
+                            required=True,
+                            output_mapper=ConfigModelProfileSpec.model_mixed_to_config_model_profile_spec,
+                        ),
+                        ConfigListString(
+                            output_mapper=ConfigModelProfileSpec.model_list_to_config_model_profile_spec
+                        ),
+                    ],
+                    required=True,
+                ),
+                default_value=[],
+                description="List of ensemble composing models with optional model config parameters. "
+                "Composing models are auto-discovered from ensemble_scheduling. Use this option to specify "
+                "configurations (e.g., instance_group count ranges) for any auto-discovered models. "
+                "Only models found in ensemble_scheduling will be profiled; others will be ignored with a warning.",
+            )
+        )
         self._add_config(
             ConfigField(
                 "cpu_only_composing_models",
@@ -1701,9 +1722,15 @@ class ConfigCommandProfile(ConfigCommand):
             new_profile_models[model.model_name()] = new_model
         # deepcopy is necessary, else it gets overwritten when updating profile_models
+        # Both bls_composing_models and ensemble_composing_models share the same
+        # profile_model_scheme ConfigObject, so updating profile_models would
+        # overwrite their values.
         self._fields["bls_composing_models"] = deepcopy(
             self._fields["bls_composing_models"]
         )
+        self._fields["ensemble_composing_models"] = deepcopy(
+            self._fields["ensemble_composing_models"]
+        )
         self._fields["profile_models"].set_value(new_profile_models)
     def _using_request_rate(self) -> bool:

model_analyzer/config/input/config_defaults.py CHANGED Viewed

@@ -1,18 +1,6 @@
 #!/usr/bin/env python3
-# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 import os
@@ -64,7 +52,7 @@ DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
 DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
 DEFAULT_DCGM_DISABLE = False
 DEFAULT_TRITON_LAUNCH_MODE = "local"
-DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:25.11-py3"
+DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:26.01-py3"
 DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
 DEFAULT_TRITON_GRPC_ENDPOINT = "localhost:8001"
 DEFAULT_TRITON_METRICS_URL = "http://localhost:8002/metrics"

model_analyzer/perf_analyzer/perf_analyzer.py CHANGED Viewed

@@ -1,18 +1,6 @@
 #!/usr/bin/env python3
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 import csv
 import glob
@@ -432,8 +420,8 @@ class PerfAnalyzer:
         return cmd
     def _get_single_model_cmd(self, index):
-        if self._model_type == "LLM":
-            cmd = ["genai-perf", "-m", self._config.models_name()]
+        if self._model_type.lower() == "llm":
+            cmd = ["genai-perf", "profile", "-m", self._config.models_name()]
             cmd += self._get_genai_perf_cli_command(index).replace("=", " ").split()
             cmd += ["--"]
             cmd += (

model_analyzer/perf_analyzer/perf_config.py CHANGED Viewed

@@ -1,18 +1,6 @@
 #!/usr/bin/env python3
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 from typing import List
@@ -98,6 +86,13 @@ class PerfAnalyzerConfig:
         "collect-metrics",
     ]
+    # Only one of these args can be sent to PA, as each one controls the inference load in a different way
+    inference_load_args = [
+        "concurrency-range",
+        "request-rate-range",
+        "request-intervals",
+    ]
     def __init__(self):
         """
         Construct a PerfAnalyzerConfig
@@ -108,7 +103,9 @@ class PerfAnalyzerConfig:
         self._options = {
             "-m": None,
             "-x": None,
-            "-b": None,
+            # Default to batch size of 1. This would be handled by PA if unspecified,
+            # but we want to be explicit so we can properly print/track values
+            "-b": 1,
             "-u": None,
             "-i": None,
             "-f": None,
@@ -160,6 +157,16 @@ class PerfAnalyzerConfig:
         return cls.additive_args[:]
+    @classmethod
+    def get_inference_load_args(cls):
+        """
+        Returns
+        -------
+        list of str
+            The Perf Analyzer args that control the inference load
+        """
+        return cls.inference_load_args
     def update_config(self, params=None):
         """
         Allows setting values from a params dict
@@ -275,6 +282,7 @@ class PerfAnalyzerConfig:
             "batch-size": self._options["-b"],
             "concurrency-range": self._args["concurrency-range"],
             "request-rate-range": self._args["request-rate-range"],
+            "request-intervals": self._args["request-intervals"],
         }
     @classmethod

triton-model-analyzer 1.48.0__py3-none-any.whl → 1.50.0__py3-none-any.whl

triton-model-analyzer 1.48.0py3-none-any.whl → 1.50.0py3-none-any.whl