triton-model-analyzer 1.49.0__py3-none-any.whl → 1.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  from copy import deepcopy
6
- from typing import List
6
+ from typing import List, Optional
7
7
 
8
8
  from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
9
9
  from model_analyzer.config.input.objects.config_model_profile_spec import (
@@ -27,15 +27,72 @@ class ModelProfileSpec(ConfigModelProfileSpec):
27
27
  client: TritonClient,
28
28
  gpus: List[GPUDevice],
29
29
  ):
30
- super().__init__(spec.model_name())
31
- self.__dict__ = deepcopy(spec.__dict__)
30
+ # Determine cpu_only based on priority (before copying spec's __dict__):
31
+ # 1) User-specified kind in instance_group (highest)
32
+ # 2) cpu_only_composing_models config
33
+ # 3) Inherited from spec (default False)
34
+ explicit_kind = self._get_explicit_instance_kind(spec)
35
+ if explicit_kind == "KIND_CPU":
36
+ cpu_only = True
37
+ elif explicit_kind == "KIND_GPU":
38
+ cpu_only = False
39
+ elif spec.model_name() in config.cpu_only_composing_models:
40
+ cpu_only = True
41
+ else:
42
+ cpu_only = spec.cpu_only()
43
+
44
+ super().__init__(spec.model_name(), cpu_only=cpu_only)
45
+
46
+ # Copy remaining attributes from spec (excluding _cpu_only which we set above)
47
+ spec_dict = deepcopy(spec.__dict__)
48
+ spec_dict.pop("_cpu_only", None)
49
+ self.__dict__.update(spec_dict)
32
50
 
33
51
  self._default_model_config = ModelConfig.create_model_config_dict(
34
52
  config, client, gpus, config.model_repository, spec.model_name()
35
53
  )
36
54
 
37
- if spec.model_name() in config.cpu_only_composing_models:
38
- self._cpu_only = True
55
+ @staticmethod
56
+ def _get_explicit_instance_kind(spec: ConfigModelProfileSpec) -> Optional[str]:
57
+ """
58
+ Check if the spec has an explicit kind specified in instance_group.
59
+
60
+ Returns the kind if explicitly specified, None otherwise.
61
+ This allows users to specify KIND_CPU or KIND_GPU directly in
62
+ model_config_parameters.instance_group instead of using the
63
+ separate cpu_only_composing_models config option.
64
+
65
+ The config parser may wrap values in lists for sweep support, so we need
66
+ to handle structures like:
67
+ - [[{'count': [1, 2, 4], 'kind': ['KIND_CPU']}]] (double-wrapped, kind is list)
68
+ - [{'count': [1, 2, 4], 'kind': 'KIND_CPU'}] (single-wrapped, kind is string)
69
+ """
70
+ model_config_params = spec.model_config_parameters()
71
+ if model_config_params is None:
72
+ return None
73
+
74
+ instance_group = model_config_params.get("instance_group")
75
+ if instance_group is None or not isinstance(instance_group, list):
76
+ return None
77
+
78
+ # instance_group structure can be doubly wrapped due to config parsing:
79
+ # [[ {'kind': ['KIND_GPU'], 'count': [1, 2, 4]} ]]
80
+ # Unwrap the nested structure if needed
81
+ if len(instance_group) > 0 and isinstance(instance_group[0], list):
82
+ instance_group = instance_group[0]
83
+
84
+ # instance_group is now a list of dicts, each potentially containing 'kind'
85
+ for ig in instance_group:
86
+ if isinstance(ig, dict):
87
+ kind = ig.get("kind")
88
+ # Handle case where kind is wrapped in a list by config parser
89
+ # e.g., ['KIND_CPU'] instead of 'KIND_CPU'
90
+ if isinstance(kind, list) and len(kind) > 0:
91
+ kind = kind[0]
92
+ if kind in ("KIND_CPU", "KIND_GPU"):
93
+ return kind
94
+
95
+ return None
39
96
 
40
97
  def get_default_config(self) -> dict:
41
98
  """Returns the default configuration for this model"""
@@ -25,6 +25,7 @@ from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
25
25
  from model_analyzer.config.run.model_run_config import ModelRunConfig
26
26
  from model_analyzer.config.run.run_config import RunConfig
27
27
  from model_analyzer.constants import LOGGER_NAME
28
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
28
29
  from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
29
30
  from model_analyzer.result.run_config_measurement import RunConfigMeasurement
30
31
  from model_analyzer.triton.model.model_config import ModelConfig
@@ -425,33 +426,53 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
425
426
  )
426
427
 
427
428
  model_config_params = deepcopy(model.model_config_parameters())
429
+
430
+ # Extract user-specified instance_group kind before removing it
431
+ instance_kind = self._extract_instance_group_kind(model_config_params)
432
+ if not instance_kind:
433
+ # Fallback to cpu_only flag
434
+ instance_kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
435
+
428
436
  if model_config_params:
437
+ # Remove parameters that are controlled by search dimensions
429
438
  model_config_params.pop("max_batch_size", None)
439
+ model_config_params.pop("instance_group", None)
430
440
 
431
- # This is guaranteed to only generate one combination (check is in config_command)
441
+ # Generate combinations from remaining parameters
442
+ # For composing models, this may include dynamic_batching settings, etc.
432
443
  param_combos = GeneratorUtils.generate_combinations(model_config_params)
433
- assert len(param_combos) == 1
434
444
 
435
- param_combo = param_combos[0]
445
+ # Top-level models must have exactly 1 combination (validated earlier)
446
+ # Composing models can have 1 combination (non-searchable params are fixed)
447
+ if len(param_combos) > 1:
448
+ raise TritonModelAnalyzerException(
449
+ f"Model {model.model_name()} has multiple parameter combinations "
450
+ f"after removing searchable parameters. This should have been caught "
451
+ f"during config validation."
452
+ )
453
+
454
+ param_combo = param_combos[0] if param_combos else {}
436
455
  else:
437
456
  param_combo = {}
438
457
 
439
- kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
458
+ # Add instance_group with count from dimension and kind from config
440
459
  instance_count = self._calculate_instance_count(dimension_values)
441
-
442
460
  param_combo["instance_group"] = [
443
461
  {
444
462
  "count": instance_count,
445
- "kind": kind,
463
+ "kind": instance_kind,
446
464
  }
447
465
  ]
448
466
 
467
+ # Add max_batch_size from dimension if applicable
449
468
  if "max_batch_size" in dimension_values:
450
469
  param_combo["max_batch_size"] = self._calculate_model_batch_size(
451
470
  dimension_values
452
471
  )
453
472
 
454
- if model.supports_dynamic_batching():
473
+ # Add default dynamic_batching if model supports it and not already specified
474
+ # Preserves user-specified dynamic_batching settings (single combinations only)
475
+ if model.supports_dynamic_batching() and "dynamic_batching" not in param_combo:
455
476
  param_combo["dynamic_batching"] = {}
456
477
 
457
478
  model_config_variant = BaseModelConfigGenerator.make_model_config_variant(
@@ -463,6 +484,48 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
463
484
 
464
485
  return model_config_variant
465
486
 
487
+ def _extract_instance_group_kind(self, model_config_params: dict) -> str:
488
+ """
489
+ Extract the 'kind' field from instance_group in model_config_parameters.
490
+
491
+ The config parser may wrap values in lists for sweep support, so we need
492
+ to handle structures like:
493
+ - [[{'count': [1, 2, 4], 'kind': ['KIND_CPU']}]] (double-wrapped, kind is list)
494
+ - [{'count': [1, 2, 4], 'kind': 'KIND_CPU'}] (single-wrapped, kind is string)
495
+
496
+ Returns empty string if not found or if instance_group is not specified.
497
+ """
498
+ if not model_config_params or "instance_group" not in model_config_params:
499
+ return ""
500
+
501
+ instance_group = model_config_params["instance_group"]
502
+
503
+ # Handle various nested list structures from config parsing
504
+ if isinstance(instance_group, list) and len(instance_group) > 0:
505
+ # Handle nested structure: [[ {...} ]]
506
+ while (
507
+ isinstance(instance_group, list)
508
+ and len(instance_group) > 0
509
+ and isinstance(instance_group[0], list)
510
+ ):
511
+ instance_group = instance_group[0]
512
+
513
+ # Now should have [{...}] structure
514
+ if (
515
+ isinstance(instance_group, list)
516
+ and len(instance_group) > 0
517
+ and isinstance(instance_group[0], dict)
518
+ ):
519
+ kind = instance_group[0].get("kind", "")
520
+ # Handle case where kind is wrapped in a list by config parser
521
+ # e.g., ['KIND_CPU'] instead of 'KIND_CPU'
522
+ if isinstance(kind, list) and len(kind) > 0:
523
+ kind = kind[0]
524
+ if isinstance(kind, str) and kind in ("KIND_CPU", "KIND_GPU"):
525
+ return kind
526
+
527
+ return ""
528
+
466
529
  def _create_next_model_run_config(
467
530
  self,
468
531
  model: ModelProfileSpec,
@@ -1,19 +1,9 @@
1
1
  #!/usr/bin/env python3
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
2
4
 
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
-
5
+ import logging
6
+ import math
17
7
  from typing import Dict, List
18
8
 
19
9
  from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
@@ -25,7 +15,7 @@ from model_analyzer.config.input.config_command_profile import ConfigCommandProf
25
15
  from model_analyzer.config.input.objects.config_model_profile_spec import (
26
16
  ConfigModelProfileSpec,
27
17
  )
28
- from model_analyzer.constants import MIN_INITIALIZED, RADIUS
18
+ from model_analyzer.constants import LOGGER_NAME, MIN_INITIALIZED, RADIUS
29
19
  from model_analyzer.device.gpu_device import GPUDevice
30
20
  from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
31
21
  from model_analyzer.result.result_manager import ResultManager
@@ -47,6 +37,8 @@ from .search_config import SearchConfig
47
37
  from .search_dimension import SearchDimension
48
38
  from .search_dimensions import SearchDimensions
49
39
 
40
+ logger = logging.getLogger(LOGGER_NAME)
41
+
50
42
 
51
43
  class RunConfigGeneratorFactory:
52
44
  """
@@ -221,9 +213,7 @@ class RunConfigGeneratorFactory:
221
213
  if model.is_ensemble():
222
214
  continue
223
215
 
224
- dims = RunConfigGeneratorFactory._get_dimensions_for_model(
225
- model.supports_batching()
226
- )
216
+ dims = RunConfigGeneratorFactory._get_dimensions_for_model(model)
227
217
  dimensions.add_dimensions(index, dims)
228
218
  index += 1
229
219
 
@@ -234,14 +224,161 @@ class RunConfigGeneratorFactory:
234
224
  return search_config
235
225
 
236
226
  @staticmethod
237
- def _get_dimensions_for_model(is_batching_supported: bool) -> List[SearchDimension]:
238
- if is_batching_supported:
239
- return RunConfigGeneratorFactory._get_batching_supported_dimensions()
227
+ def _get_dimensions_for_model(model: ModelProfileSpec) -> List[SearchDimension]:
228
+ """
229
+ Create search dimensions for a model, respecting user-specified
230
+ instance_group count lists if provided.
231
+ """
232
+ dims = []
233
+
234
+ # Check if user specified instance_group with a count list
235
+ instance_count_list = RunConfigGeneratorFactory._get_instance_count_list(model)
236
+
237
+ if instance_count_list:
238
+ # User specified a list - create constrained dimension
239
+ dim = RunConfigGeneratorFactory._create_instance_dimension_from_list(
240
+ instance_count_list
241
+ )
242
+ dims.append(dim)
243
+ else:
244
+ # Use default unbounded dimension
245
+ dims.append(
246
+ SearchDimension("instance_count", SearchDimension.DIMENSION_TYPE_LINEAR)
247
+ )
248
+
249
+ # Add max_batch_size dimension if model supports batching
250
+ if model.supports_batching():
251
+ # For now, max_batch_size always uses default exponential dimension
252
+ # Could be extended to support user-specified lists in the future
253
+ dims.insert(
254
+ 0,
255
+ SearchDimension(
256
+ "max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL
257
+ ),
258
+ )
259
+
260
+ return dims
261
+
262
+ @staticmethod
263
+ def _get_instance_count_list(model: ModelProfileSpec) -> List[int]:
264
+ """
265
+ Extract instance_group count list from model config parameters if specified.
266
+
267
+ Returns empty list if not specified or not a list.
268
+ """
269
+ model_config_params = model.model_config_parameters()
270
+ if not model_config_params:
271
+ return []
272
+
273
+ if "instance_group" not in model_config_params:
274
+ return []
275
+
276
+ # instance_group structure: [[ {'kind': 'KIND_GPU', 'count': [1, 2, 4]} ]]
277
+ # The outer lists are from config parsing wrapping
278
+ instance_group = model_config_params["instance_group"]
279
+
280
+ if not instance_group or not isinstance(instance_group, list):
281
+ return []
282
+
283
+ # Unwrap the nested structure
284
+ if len(instance_group) > 0 and isinstance(instance_group[0], list):
285
+ instance_group = instance_group[0]
286
+
287
+ if len(instance_group) == 0 or not isinstance(instance_group[0], dict):
288
+ return []
289
+
290
+ count = instance_group[0].get("count")
291
+ if isinstance(count, list) and len(count) > 0:
292
+ return count
293
+
294
+ return []
295
+
296
+ @staticmethod
297
+ def _create_instance_dimension_from_list(
298
+ count_list: List[int],
299
+ ) -> SearchDimension:
300
+ """
301
+ Create a SearchDimension for instance_count from a user-specified list.
302
+
303
+ For lists that are powers of 2 (e.g., [1, 2, 4, 8, 16, 32]),
304
+ uses EXPONENTIAL dimension type with appropriate min/max indexes.
305
+
306
+ For other lists, uses LINEAR dimension type with appropriate min/max.
307
+
308
+ Raises TritonModelAnalyzerException if the list is not compatible with
309
+ either LINEAR or EXPONENTIAL growth patterns.
310
+ """
311
+ if not count_list or len(count_list) == 0:
312
+ raise TritonModelAnalyzerException("Instance count list cannot be empty")
313
+
314
+ # Sort the list to check for patterns
315
+ sorted_counts = sorted(count_list)
316
+
317
+ # Check if it's powers of 2
318
+ if RunConfigGeneratorFactory._is_powers_of_two(sorted_counts):
319
+ # Use EXPONENTIAL: 2^idx gives the value
320
+ # For [1, 2, 4, 8, 16, 32]: min_idx=0 (2^0=1), max_idx=5 (2^5=32)
321
+ min_idx = int(math.log2(sorted_counts[0]))
322
+ max_idx = int(math.log2(sorted_counts[-1]))
323
+
324
+ return SearchDimension(
325
+ "instance_count",
326
+ SearchDimension.DIMENSION_TYPE_EXPONENTIAL,
327
+ min=min_idx,
328
+ max=max_idx,
329
+ )
330
+
331
+ # Check if it's a contiguous linear sequence
332
+ elif RunConfigGeneratorFactory._is_linear_sequence(sorted_counts):
333
+ # Use LINEAR: idx+1 gives the value (LINEAR starts at 1, not 0)
334
+ # For [1, 2, 3, 4]: min_idx=0 (0+1=1), max_idx=3 (3+1=4)
335
+ min_idx = sorted_counts[0] - 1
336
+ max_idx = sorted_counts[-1] - 1
337
+
338
+ return SearchDimension(
339
+ "instance_count",
340
+ SearchDimension.DIMENSION_TYPE_LINEAR,
341
+ min=min_idx,
342
+ max=max_idx,
343
+ )
344
+
240
345
  else:
241
- return RunConfigGeneratorFactory._get_batching_not_supported_dimensions()
346
+ # List is not compatible with LINEAR or EXPONENTIAL
347
+ raise TritonModelAnalyzerException(
348
+ f"Instance count list {count_list} is not compatible with Quick search mode. "
349
+ f"Lists must be either powers of 2 (e.g., [1, 2, 4, 8, 16, 32]) "
350
+ f"or a contiguous sequence (e.g., [1, 2, 3, 4, 5])."
351
+ )
352
+
353
+ @staticmethod
354
+ def _is_powers_of_two(sorted_list: List[int]) -> bool:
355
+ """Check if all values in the list are powers of 2 and form a valid sequence."""
356
+ for val in sorted_list:
357
+ if val <= 0:
358
+ return False
359
+ # Check if val is a power of 2: log2(val) should be an integer
360
+ log_val = math.log2(val)
361
+ if not log_val.is_integer():
362
+ return False
363
+
364
+ return True
365
+
366
+ @staticmethod
367
+ def _is_linear_sequence(sorted_list: List[int]) -> bool:
368
+ """Check if the list is a contiguous linear sequence."""
369
+ if len(sorted_list) < 2:
370
+ return True
371
+
372
+ # Check if values are consecutive: diff should always be 1
373
+ for i in range(1, len(sorted_list)):
374
+ if sorted_list[i] - sorted_list[i - 1] != 1:
375
+ return False
376
+
377
+ return True
242
378
 
243
379
  @staticmethod
244
380
  def _get_batching_supported_dimensions() -> List[SearchDimension]:
381
+ """Legacy method - kept for backward compatibility."""
245
382
  return [
246
383
  SearchDimension(
247
384
  f"max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL
@@ -251,6 +388,7 @@ class RunConfigGeneratorFactory:
251
388
 
252
389
  @staticmethod
253
390
  def _get_batching_not_supported_dimensions() -> List[SearchDimension]:
391
+ """Legacy method - kept for backward compatibility."""
254
392
  return [
255
393
  SearchDimension(f"instance_count", SearchDimension.DIMENSION_TYPE_LINEAR)
256
394
  ]
@@ -306,24 +444,50 @@ class RunConfigGeneratorFactory:
306
444
  gpus: List[GPUDevice],
307
445
  ) -> List[ModelProfileSpec]:
308
446
  """
309
- Creates a list of Ensemble composing model configs based on the model
447
+ Creates a list of Ensemble composing model configs based on the model.
448
+
449
+ If user specified ensemble_composing_models configs, use those for matching models.
450
+ Otherwise, use auto-discovered configs from ensemble_scheduling.
310
451
  """
311
452
  model_config = ModelConfig.create_from_profile_spec(model, config, client, gpus)
312
453
 
313
454
  if not model_config.is_ensemble():
314
455
  return []
315
456
 
457
+ # Auto-discover composing model names from ensemble_scheduling
316
458
  ensemble_composing_model_names = model_config.get_ensemble_composing_models()
459
+ if ensemble_composing_model_names is None:
460
+ return []
317
461
 
318
- ensemble_composing_model_specs = (
319
- ConfigModelProfileSpec.model_list_to_config_model_profile_spec(
462
+ # Check if user provided configs for any of these models
463
+ user_provided_configs = {}
464
+ if config.ensemble_composing_models is not None:
465
+ for user_spec in config.ensemble_composing_models:
466
+ user_provided_configs[user_spec.model_name()] = user_spec
467
+
468
+ # Create ModelProfileSpecs, using user configs when available
469
+ ensemble_composing_model_configs = []
470
+ for model_name in ensemble_composing_model_names:
471
+ if model_name in user_provided_configs:
472
+ # Use user-provided config with model_config_parameters
473
+ model_spec = user_provided_configs[model_name]
474
+ else:
475
+ # Use auto-discovered config (just model name, no parameters)
476
+ model_spec = ConfigModelProfileSpec(model_name)
477
+
478
+ mps = ModelProfileSpec(model_spec, config, client, gpus)
479
+ ensemble_composing_model_configs.append(mps)
480
+
481
+ # Warn if user specified models that aren't in the ensemble
482
+ if user_provided_configs:
483
+ unused_models = set(user_provided_configs.keys()) - set(
320
484
  ensemble_composing_model_names
321
485
  )
322
- )
323
-
324
- ensemble_composing_model_configs = [
325
- ModelProfileSpec(ensemble_composing_model_spec, config, client, gpus)
326
- for ensemble_composing_model_spec in ensemble_composing_model_specs
327
- ]
486
+ if unused_models:
487
+ logger.warning(
488
+ f"The following models in ensemble_composing_models were not found "
489
+ f"in the ensemble '{model.model_name()}' and will be ignored: "
490
+ f"{', '.join(sorted(unused_models))}"
491
+ )
328
492
 
329
493
  return ensemble_composing_model_configs
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  from argparse import Namespace
18
6
  from copy import deepcopy
@@ -251,8 +239,18 @@ class ConfigCommand:
251
239
  ):
252
240
  return
253
241
 
242
+ # Get composing model names for validation
243
+ bls_composing = (
244
+ self._get_config_value("bls_composing_models", args, yaml_config) or []
245
+ )
246
+ cpu_only_composing = (
247
+ self._get_config_value("cpu_only_composing_models", args, yaml_config) or []
248
+ )
249
+
254
250
  self._check_per_model_parameters(profile_models)
255
- self._check_per_model_model_config_parameters(profile_models)
251
+ self._check_per_model_model_config_parameters(
252
+ profile_models, bls_composing, cpu_only_composing
253
+ )
256
254
 
257
255
  def _check_per_model_parameters(self, profile_models: Dict) -> None:
258
256
  for model in profile_models.values():
@@ -268,23 +266,81 @@ class ConfigCommand:
268
266
  "\nPlease use brute search mode or remove concurrency/batch sizes list."
269
267
  )
270
268
 
271
- def _check_per_model_model_config_parameters(self, profile_models: Dict) -> None:
272
- for model in profile_models.values():
269
+ def _check_per_model_model_config_parameters(
270
+ self, profile_models: Dict, bls_composing: List, cpu_only_composing: List
271
+ ) -> None:
272
+ for model_name, model in profile_models.items():
273
273
  if not "model_config_parameters" in model:
274
274
  continue
275
275
 
276
+ # Check if this is a composing model
277
+ is_composing = False
278
+ if bls_composing:
279
+ # bls_composing might be a list of dicts or list of strings
280
+ if isinstance(bls_composing, list):
281
+ is_composing = any(
282
+ (isinstance(m, dict) and m.get("model_name") == model_name)
283
+ or (isinstance(m, str) and m == model_name)
284
+ for m in bls_composing
285
+ )
286
+ if (
287
+ not is_composing
288
+ and cpu_only_composing
289
+ and model_name in cpu_only_composing
290
+ ):
291
+ is_composing = True
292
+
293
+ # Composing models are allowed to have these parameters with ranges
294
+ if is_composing:
295
+ continue
296
+
276
297
  if "max_batch_size" in model["model_config_parameters"]:
277
298
  raise TritonModelAnalyzerException(
278
- f"\nProfiling of models in quick search mode is not supported with lists of max batch sizes."
299
+ f"\nProfiling of top-level models in quick search mode is not supported with lists of max batch sizes."
279
300
  "\nPlease use brute search mode or remove max batch size list."
301
+ "\nNote: Composing models in ensembles/BLS can have max_batch_size ranges in Quick mode."
280
302
  )
281
303
 
282
304
  if "instance_group" in model["model_config_parameters"]:
283
305
  raise TritonModelAnalyzerException(
284
- f"\nProfiling of models in quick search mode is not supported with instance group as a model config parameter"
306
+ f"\nProfiling of top-level models in quick search mode is not supported with instance group as a model config parameter."
285
307
  "\nPlease use brute search mode or remove instance_group from 'model_config_parameters'."
308
+ "\nNote: Composing models in ensembles/BLS can have instance_group with count ranges in Quick mode."
286
309
  )
287
310
 
311
+ def _is_composing_model(self, model_name: str, config: Dict) -> bool:
312
+ """
313
+ Determine if a model is a composing model by checking:
314
+ 1. If it's in bls_composing_models list
315
+ 2. If it's in ensemble_composing_models list
316
+ 3. If it's in cpu_only_composing_models list
317
+
318
+ Note: We cannot check ensemble_scheduling at this stage because
319
+ we haven't loaded the model configs from the repository yet.
320
+ Users must explicitly list ensemble composing models in ensemble_composing_models
321
+ to enable parameter ranges for them.
322
+ """
323
+ if "bls_composing_models" in config:
324
+ bls_composing = config["bls_composing_models"].value()
325
+ if bls_composing and any(
326
+ m.model_name() == model_name for m in bls_composing
327
+ ):
328
+ return True
329
+
330
+ if "ensemble_composing_models" in config:
331
+ ensemble_composing = config["ensemble_composing_models"].value()
332
+ if ensemble_composing and any(
333
+ m.model_name() == model_name for m in ensemble_composing
334
+ ):
335
+ return True
336
+
337
+ if "cpu_only_composing_models" in config:
338
+ cpu_only_composing = config["cpu_only_composing_models"].value()
339
+ if cpu_only_composing and model_name in cpu_only_composing:
340
+ return True
341
+
342
+ return False
343
+
288
344
  def _check_quick_search_model_config_parameters_combinations(self) -> None:
289
345
  config = self.get_config()
290
346
  if not "profile_models" in config:
@@ -295,13 +351,18 @@ class ConfigCommand:
295
351
 
296
352
  profile_models = config["profile_models"].value()
297
353
  for model in profile_models:
354
+ # Composing models are allowed to have parameter ranges in Quick mode
355
+ if self._is_composing_model(model.model_name(), config):
356
+ continue
357
+
298
358
  model_config_params = deepcopy(model.model_config_parameters())
299
359
  if model_config_params:
300
360
  if len(GeneratorUtils.generate_combinations(model_config_params)) > 1:
301
361
  raise TritonModelAnalyzerException(
302
- f"\nProfiling of models in quick search mode is not supported for the specified model config parameters, "
362
+ f"\nProfiling of top-level models in quick search mode is not supported for the specified model config parameters, "
303
363
  f"as more than one combination of parameters can be generated."
304
364
  f"\nPlease use brute search mode to profile or remove the model config parameters specified."
365
+ f"\nNote: Composing models in ensembles/BLS can have parameter ranges in Quick mode."
305
366
  )
306
367
 
307
368
  def _check_bls_no_brute_search(
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import argparse
18
6
  import logging
@@ -803,6 +791,39 @@ class ConfigCommandProfile(ConfigCommand):
803
791
  description="List of the models to be profiled",
804
792
  )
805
793
  )
794
+ self._add_config(
795
+ ConfigField(
796
+ "ensemble_composing_models",
797
+ flags=["--ensemble-composing-models"],
798
+ field_type=ConfigUnion(
799
+ [
800
+ profile_model_scheme,
801
+ ConfigListGeneric(
802
+ ConfigUnion(
803
+ [
804
+ profile_model_scheme,
805
+ ConfigPrimitive(
806
+ str,
807
+ output_mapper=ConfigModelProfileSpec.model_str_to_config_model_profile_spec,
808
+ ),
809
+ ]
810
+ ),
811
+ required=True,
812
+ output_mapper=ConfigModelProfileSpec.model_mixed_to_config_model_profile_spec,
813
+ ),
814
+ ConfigListString(
815
+ output_mapper=ConfigModelProfileSpec.model_list_to_config_model_profile_spec
816
+ ),
817
+ ],
818
+ required=True,
819
+ ),
820
+ default_value=[],
821
+ description="List of ensemble composing models with optional model config parameters. "
822
+ "Composing models are auto-discovered from ensemble_scheduling. Use this option to specify "
823
+ "configurations (e.g., instance_group count ranges) for any auto-discovered models. "
824
+ "Only models found in ensemble_scheduling will be profiled; others will be ignored with a warning.",
825
+ )
826
+ )
806
827
  self._add_config(
807
828
  ConfigField(
808
829
  "cpu_only_composing_models",
@@ -1701,9 +1722,15 @@ class ConfigCommandProfile(ConfigCommand):
1701
1722
  new_profile_models[model.model_name()] = new_model
1702
1723
 
1703
1724
  # deepcopy is necessary, else it gets overwritten when updating profile_models
1725
+ # Both bls_composing_models and ensemble_composing_models share the same
1726
+ # profile_model_scheme ConfigObject, so updating profile_models would
1727
+ # overwrite their values.
1704
1728
  self._fields["bls_composing_models"] = deepcopy(
1705
1729
  self._fields["bls_composing_models"]
1706
1730
  )
1731
+ self._fields["ensemble_composing_models"] = deepcopy(
1732
+ self._fields["ensemble_composing_models"]
1733
+ )
1707
1734
  self._fields["profile_models"].set_value(new_profile_models)
1708
1735
 
1709
1736
  def _using_request_rate(self) -> bool:
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- # SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import os
@@ -52,7 +52,7 @@ DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
52
52
  DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
53
53
  DEFAULT_DCGM_DISABLE = False
54
54
  DEFAULT_TRITON_LAUNCH_MODE = "local"
55
- DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:25.12-py3"
55
+ DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:26.01-py3"
56
56
  DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
57
57
  DEFAULT_TRITON_GRPC_ENDPOINT = "localhost:8001"
58
58
  DEFAULT_TRITON_METRICS_URL = "http://localhost:8002/metrics"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: triton-model-analyzer
3
- Version: 1.49.0
3
+ Version: 1.50.0
4
4
  Summary: Triton Model Analyzer is a tool to profile and analyze the runtime performance of one or more models on the Triton Inference Server
5
5
  Author-email: "NVIDIA Inc." <sw-dl-triton@nvidia.com>
6
6
  License-Expression: Apache-2.0
@@ -20,7 +20,7 @@ model_analyzer/config/generate/coordinate_data.py,sha256=deqoRIvO0aN5D5sNDZcjmT0
20
20
  model_analyzer/config/generate/generator_utils.py,sha256=5hj7qGzVs-oHk6UK_ggBzKJiTDVdX6MSLrXyEfU-kFE,4157
21
21
  model_analyzer/config/generate/manual_model_config_generator.py,sha256=_Y78ORijg5E6c2a2zESwJzK55S8fbhoy2rqFTQq-seA,7088
22
22
  model_analyzer/config/generate/model_config_generator_factory.py,sha256=5BjSJxRmrEVy3LyjIPnMT-QCz8GABa62oY1VvJfmhbE,3334
23
- model_analyzer/config/generate/model_profile_spec.py,sha256=fOCTNSK_GOOQ1FaRuCo_rDeVEf8H_67hUVERFlRndtM,2739
23
+ model_analyzer/config/generate/model_profile_spec.py,sha256=WSznjBRO16vwet4f3XhrjmHx3tgAQU9ZJDh8CYI4E3c,5321
24
24
  model_analyzer/config/generate/model_run_config_generator.py,sha256=Ip1djtZ3wlwG_Zs3YivIlJxmwS1TsNa_7aMCU9bCQP0,5484
25
25
  model_analyzer/config/generate/model_variant_name_manager.py,sha256=qPAcvDQhkbArkW-A1A1mdRyqALUgfLlyV3dhNV6jKgs,5362
26
26
  model_analyzer/config/generate/neighborhood.py,sha256=s9g9igetHvF6Xm9FhuzjbhPBVWO65SngABrX3RGOUBQ,19481
@@ -28,18 +28,18 @@ model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generato
28
28
  model_analyzer/config/generate/optuna_run_config_generator.py,sha256=Z09p5tZP8wl0hp9NpHXpGovv1dZZkZzH11FnuXe8HhU,33099
29
29
  model_analyzer/config/generate/perf_analyzer_config_generator.py,sha256=cCLQTV5V65QelGFMP3B4JaYvejNZj4tHJUQSZ3nTZSY,11112
30
30
  model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py,sha256=hAL6YD6hqLDL7ttOgJuDIUvEe6BpHNlYv82cQdHUo5o,4516
31
- model_analyzer/config/generate/quick_run_config_generator.py,sha256=ccs8W7Bh20pmxel5iQF92pF7pirpaBQeveQNhUwWPq0,27719
32
- model_analyzer/config/generate/run_config_generator_factory.py,sha256=KPjZGz0R-upqf7pwrtWrVRroNC-aDPeGZd63XR92zDU,13008
31
+ model_analyzer/config/generate/quick_run_config_generator.py,sha256=1JQo3gaZeC2eUNrtuMeyQzuILZX3lBlNX-UhNaXMvBY,30880
32
+ model_analyzer/config/generate/run_config_generator_factory.py,sha256=DDAsqWQoKhBA2XFb68igVX7cO1WY5xt15_hkBxX6_Cg,19453
33
33
  model_analyzer/config/generate/search_config.py,sha256=mvQaoQ9wIeNEHWJtIwXh0JujPQZqXLLXmWpI7uBHDGA,3655
34
34
  model_analyzer/config/generate/search_dimension.py,sha256=iUoh4IhhMy9zhMhaY-q9DoBEku4--4o4j8RYQfXUVVk,2375
35
35
  model_analyzer/config/generate/search_dimensions.py,sha256=_hbpnb3gUYuCVh_LLP4FMjB1sV-IKrvekpEhi2lTCIs,2618
36
36
  model_analyzer/config/generate/search_parameter.py,sha256=E-mM5bWKdPUXjT8DCmVLWuJWTs-jBC8U8ZBDmx8GGMs,1392
37
37
  model_analyzer/config/generate/search_parameters.py,sha256=knTE9Vw7lS37SuGi1hZpVb01DFLubJpzAPJSbTHlQDA,14876
38
38
  model_analyzer/config/input/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3TD9rI,647
39
- model_analyzer/config/input/config_command.py,sha256=63Y6aJrGBA-tR5eljlZSNwgFbHc4hZYd8mEOpmdrJxc,18776
40
- model_analyzer/config/input/config_command_profile.py,sha256=P5V0y_J3p6BCDCu_QBsl5AXaz_Y73y7CpiessWbuCy4,70509
39
+ model_analyzer/config/input/config_command.py,sha256=kS02U8c5AO2XNsxM8VFOMYhFaN5r8-aPm5tjFrCiyxA,21495
40
+ model_analyzer/config/input/config_command_profile.py,sha256=Rbx5x_jj0SC7r1SXEVPLkT8CXu_KX0iLzVySdOyQJ2c,72018
41
41
  model_analyzer/config/input/config_command_report.py,sha256=Igiid6XK5OB23jvXZKPI4Pg5ZsSDkXgHU-QAGOo6_-E,9535
42
- model_analyzer/config/input/config_defaults.py,sha256=DYmjHUPQxqhlhRmE2VtzLBHhdwhk-zXkMrlTJuYQGTg,6202
42
+ model_analyzer/config/input/config_defaults.py,sha256=t1hkTDqc82tNtyfr39NDEb_n2a4qtnf0GrR69BtzpAU,6202
43
43
  model_analyzer/config/input/config_enum.py,sha256=lu9cUz_MY52EcLEXxcLYWqB5If6FhsM1K8w1srzhDt4,2386
44
44
  model_analyzer/config/input/config_field.py,sha256=XlqlouNuPENEYCBLYtHxsfMACrFlcYZGkbh6yE1psNo,5156
45
45
  model_analyzer/config/input/config_list_generic.py,sha256=-4vhDxIEMPOwYcUIkMIFzxJ61mOmX6GEgGCpTfbCFsc,3355
@@ -196,9 +196,9 @@ model_analyzer/triton/server/server_config.py,sha256=3pNQAnUmSXVAO7pKLNWak7AFGm1
196
196
  model_analyzer/triton/server/server_docker.py,sha256=38e_tMniv2PBiEIh2KMxgqsMZlttL0yCrpD0lKd0r5g,8248
197
197
  model_analyzer/triton/server/server_factory.py,sha256=WMZfXFQfO9aeFxCWewQhRk9ygMcne0ershGZ75XQ5IE,11152
198
198
  model_analyzer/triton/server/server_local.py,sha256=i5t5MaDwfvWqBA3zG15GPkGiUm6I4Bb4i0d-wBP0WNs,5406
199
- triton_model_analyzer-1.49.0.dist-info/licenses/LICENSE,sha256=vs3gOjyLmy49WMe2XKf_2eGOmBYjlsw3QFKYPB-qpHw,9144
200
- triton_model_analyzer-1.49.0.dist-info/METADATA,sha256=ebuhiEKjcPklcz1N1BXDHRZRM2B_a1OyAHCGBeirh_I,2512
201
- triton_model_analyzer-1.49.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
202
- triton_model_analyzer-1.49.0.dist-info/entry_points.txt,sha256=k6Q0D76sL3qUNxe80hUThftycJAT7Sj5dBk8IjHXJoI,66
203
- triton_model_analyzer-1.49.0.dist-info/top_level.txt,sha256=mTebknhUDSE7cCVyUtpzNVQ-tEUi8zZKFvp15xpIP2c,15
204
- triton_model_analyzer-1.49.0.dist-info/RECORD,,
199
+ triton_model_analyzer-1.50.0.dist-info/licenses/LICENSE,sha256=vs3gOjyLmy49WMe2XKf_2eGOmBYjlsw3QFKYPB-qpHw,9144
200
+ triton_model_analyzer-1.50.0.dist-info/METADATA,sha256=uDYfY5KZsLOHaugxXsqh2mCc6ByjTQvg2TT_j1IStBA,2512
201
+ triton_model_analyzer-1.50.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
202
+ triton_model_analyzer-1.50.0.dist-info/entry_points.txt,sha256=k6Q0D76sL3qUNxe80hUThftycJAT7Sj5dBk8IjHXJoI,66
203
+ triton_model_analyzer-1.50.0.dist-info/top_level.txt,sha256=mTebknhUDSE7cCVyUtpzNVQ-tEUi8zZKFvp15xpIP2c,15
204
+ triton_model_analyzer-1.50.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5