triton-model-analyzer 1.48.0__py3-none-any.whl → 1.50.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +10 -14
- model_analyzer/config/generate/model_profile_spec.py +76 -19
- model_analyzer/config/generate/perf_analyzer_config_generator.py +14 -22
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +2 -16
- model_analyzer/config/generate/quick_run_config_generator.py +86 -35
- model_analyzer/config/generate/run_config_generator_factory.py +195 -31
- model_analyzer/config/input/config_command.py +81 -20
- model_analyzer/config/input/config_command_profile.py +41 -14
- model_analyzer/config/input/config_defaults.py +3 -15
- model_analyzer/perf_analyzer/perf_analyzer.py +4 -16
- model_analyzer/perf_analyzer/perf_config.py +23 -15
- model_analyzer/plots/detailed_plot.py +41 -54
- model_analyzer/record/metrics_manager.py +7 -15
- model_analyzer/record/types/gpu_free_memory.py +13 -14
- model_analyzer/record/types/gpu_total_memory.py +13 -14
- model_analyzer/record/types/gpu_used_memory.py +13 -14
- model_analyzer/result/parameter_search.py +10 -17
- model_analyzer/result/run_config_measurement.py +29 -24
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/METADATA +1 -1
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/RECORD +24 -24
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/WHEEL +1 -1
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/entry_points.txt +0 -0
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/licenses/LICENSE +0 -0
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from copy import deepcopy
|
|
@@ -132,9 +120,11 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
|
|
|
132
120
|
for result in top_results:
|
|
133
121
|
run_config = deepcopy(result.run_config())
|
|
134
122
|
model_parameters = self._get_model_parameters(model_name)
|
|
123
|
+
perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name)
|
|
135
124
|
parameter_search = ParameterSearch(
|
|
136
125
|
config=self._config,
|
|
137
126
|
model_parameters=model_parameters,
|
|
127
|
+
perf_analyzer_flags=perf_analyzer_flags,
|
|
138
128
|
skip_parameter_sweep=True,
|
|
139
129
|
)
|
|
140
130
|
for parameter in parameter_search.search_parameters():
|
|
@@ -151,6 +141,12 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
|
|
|
151
141
|
|
|
152
142
|
return {}
|
|
153
143
|
|
|
144
|
+
def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict:
|
|
145
|
+
for model in self._models:
|
|
146
|
+
if model_name == model.model_name():
|
|
147
|
+
return model.perf_analyzer_flags()
|
|
148
|
+
return {}
|
|
149
|
+
|
|
154
150
|
def _set_parameter(
|
|
155
151
|
self, run_config: RunConfig, model_parameters: Dict, parameter: int
|
|
156
152
|
) -> RunConfig:
|
|
@@ -1,27 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
from copy import deepcopy
|
|
18
|
-
from typing import List
|
|
6
|
+
from typing import List, Optional
|
|
19
7
|
|
|
20
8
|
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
21
9
|
from model_analyzer.config.input.objects.config_model_profile_spec import (
|
|
22
10
|
ConfigModelProfileSpec,
|
|
23
11
|
)
|
|
24
12
|
from model_analyzer.device.gpu_device import GPUDevice
|
|
13
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
25
14
|
from model_analyzer.triton.client.client import TritonClient
|
|
26
15
|
from model_analyzer.triton.model.model_config import ModelConfig
|
|
27
16
|
|
|
@@ -38,15 +27,72 @@ class ModelProfileSpec(ConfigModelProfileSpec):
|
|
|
38
27
|
client: TritonClient,
|
|
39
28
|
gpus: List[GPUDevice],
|
|
40
29
|
):
|
|
41
|
-
|
|
42
|
-
|
|
30
|
+
# Determine cpu_only based on priority (before copying spec's __dict__):
|
|
31
|
+
# 1) User-specified kind in instance_group (highest)
|
|
32
|
+
# 2) cpu_only_composing_models config
|
|
33
|
+
# 3) Inherited from spec (default False)
|
|
34
|
+
explicit_kind = self._get_explicit_instance_kind(spec)
|
|
35
|
+
if explicit_kind == "KIND_CPU":
|
|
36
|
+
cpu_only = True
|
|
37
|
+
elif explicit_kind == "KIND_GPU":
|
|
38
|
+
cpu_only = False
|
|
39
|
+
elif spec.model_name() in config.cpu_only_composing_models:
|
|
40
|
+
cpu_only = True
|
|
41
|
+
else:
|
|
42
|
+
cpu_only = spec.cpu_only()
|
|
43
|
+
|
|
44
|
+
super().__init__(spec.model_name(), cpu_only=cpu_only)
|
|
45
|
+
|
|
46
|
+
# Copy remaining attributes from spec (excluding _cpu_only which we set above)
|
|
47
|
+
spec_dict = deepcopy(spec.__dict__)
|
|
48
|
+
spec_dict.pop("_cpu_only", None)
|
|
49
|
+
self.__dict__.update(spec_dict)
|
|
43
50
|
|
|
44
51
|
self._default_model_config = ModelConfig.create_model_config_dict(
|
|
45
52
|
config, client, gpus, config.model_repository, spec.model_name()
|
|
46
53
|
)
|
|
47
54
|
|
|
48
|
-
|
|
49
|
-
|
|
55
|
+
@staticmethod
|
|
56
|
+
def _get_explicit_instance_kind(spec: ConfigModelProfileSpec) -> Optional[str]:
|
|
57
|
+
"""
|
|
58
|
+
Check if the spec has an explicit kind specified in instance_group.
|
|
59
|
+
|
|
60
|
+
Returns the kind if explicitly specified, None otherwise.
|
|
61
|
+
This allows users to specify KIND_CPU or KIND_GPU directly in
|
|
62
|
+
model_config_parameters.instance_group instead of using the
|
|
63
|
+
separate cpu_only_composing_models config option.
|
|
64
|
+
|
|
65
|
+
The config parser may wrap values in lists for sweep support, so we need
|
|
66
|
+
to handle structures like:
|
|
67
|
+
- [[{'count': [1, 2, 4], 'kind': ['KIND_CPU']}]] (double-wrapped, kind is list)
|
|
68
|
+
- [{'count': [1, 2, 4], 'kind': 'KIND_CPU'}] (single-wrapped, kind is string)
|
|
69
|
+
"""
|
|
70
|
+
model_config_params = spec.model_config_parameters()
|
|
71
|
+
if model_config_params is None:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
instance_group = model_config_params.get("instance_group")
|
|
75
|
+
if instance_group is None or not isinstance(instance_group, list):
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
# instance_group structure can be doubly wrapped due to config parsing:
|
|
79
|
+
# [[ {'kind': ['KIND_GPU'], 'count': [1, 2, 4]} ]]
|
|
80
|
+
# Unwrap the nested structure if needed
|
|
81
|
+
if len(instance_group) > 0 and isinstance(instance_group[0], list):
|
|
82
|
+
instance_group = instance_group[0]
|
|
83
|
+
|
|
84
|
+
# instance_group is now a list of dicts, each potentially containing 'kind'
|
|
85
|
+
for ig in instance_group:
|
|
86
|
+
if isinstance(ig, dict):
|
|
87
|
+
kind = ig.get("kind")
|
|
88
|
+
# Handle case where kind is wrapped in a list by config parser
|
|
89
|
+
# e.g., ['KIND_CPU'] instead of 'KIND_CPU'
|
|
90
|
+
if isinstance(kind, list) and len(kind) > 0:
|
|
91
|
+
kind = kind[0]
|
|
92
|
+
if kind in ("KIND_CPU", "KIND_GPU"):
|
|
93
|
+
return kind
|
|
94
|
+
|
|
95
|
+
return None
|
|
50
96
|
|
|
51
97
|
def get_default_config(self) -> dict:
|
|
52
98
|
"""Returns the default configuration for this model"""
|
|
@@ -72,3 +118,14 @@ class ModelProfileSpec(ConfigModelProfileSpec):
|
|
|
72
118
|
def is_ensemble(self) -> bool:
|
|
73
119
|
"""Returns true if the model is an ensemble"""
|
|
74
120
|
return "ensemble_scheduling" in self._default_model_config
|
|
121
|
+
|
|
122
|
+
def is_load_specified(self) -> bool:
|
|
123
|
+
"""
|
|
124
|
+
Returns true if the model's PA config has specified any of the
|
|
125
|
+
inference load args (such as concurrency). Else returns false
|
|
126
|
+
"""
|
|
127
|
+
load_args = PerfAnalyzerConfig.get_inference_load_args()
|
|
128
|
+
pa_flags = self.perf_analyzer_flags()
|
|
129
|
+
if pa_flags is None:
|
|
130
|
+
return False
|
|
131
|
+
return any(e in pa_flags for e in load_args)
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from typing import Generator, List, Optional
|
|
@@ -169,10 +157,12 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
|
|
|
169
157
|
self._parameter_results.extend(measurement)
|
|
170
158
|
|
|
171
159
|
def _create_parameter_list(self) -> List[int]:
|
|
172
|
-
#
|
|
173
|
-
#
|
|
174
|
-
#
|
|
175
|
-
if self.
|
|
160
|
+
# Determines the inference load (concurrency or request-rate or request-intervals)
|
|
161
|
+
# and creates the list of values to use. If nothing is specified by the user, then
|
|
162
|
+
# concurrency will be used.
|
|
163
|
+
if "request-intervals" in self._perf_analyzer_flags:
|
|
164
|
+
return [self._perf_analyzer_flags["request-intervals"]]
|
|
165
|
+
elif self._cli_config.is_request_rate_specified(self._model_parameters):
|
|
176
166
|
return self._create_request_rate_list()
|
|
177
167
|
else:
|
|
178
168
|
return self._create_concurrency_list()
|
|
@@ -207,7 +197,7 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
|
|
|
207
197
|
for params in utils.generate_parameter_combinations(
|
|
208
198
|
perf_config_non_parameter_values
|
|
209
199
|
):
|
|
210
|
-
|
|
200
|
+
configs_with_inference_load = []
|
|
211
201
|
for parameter in self._parameters:
|
|
212
202
|
new_perf_config = PerfAnalyzerConfig()
|
|
213
203
|
|
|
@@ -217,7 +207,9 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
|
|
|
217
207
|
|
|
218
208
|
new_perf_config.update_config(params)
|
|
219
209
|
|
|
220
|
-
if self.
|
|
210
|
+
if "request-intervals" in self._perf_analyzer_flags:
|
|
211
|
+
pass
|
|
212
|
+
elif self._cli_config.is_request_rate_specified(self._model_parameters):
|
|
221
213
|
new_perf_config.update_config({"request-rate-range": parameter})
|
|
222
214
|
else:
|
|
223
215
|
new_perf_config.update_config({"concurrency-range": parameter})
|
|
@@ -225,8 +217,8 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
|
|
|
225
217
|
# User provided flags can override the search parameters
|
|
226
218
|
new_perf_config.update_config(self._perf_analyzer_flags)
|
|
227
219
|
|
|
228
|
-
|
|
229
|
-
self._configs.append(
|
|
220
|
+
configs_with_inference_load.append(new_perf_config)
|
|
221
|
+
self._configs.append(configs_with_inference_load)
|
|
230
222
|
|
|
231
223
|
def _create_non_parameter_perf_config_values(self) -> dict:
|
|
232
224
|
perf_config_values = {
|
|
@@ -1,21 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
|
-
from copy import deepcopy
|
|
19
6
|
from typing import Generator, List, Optional
|
|
20
7
|
|
|
21
8
|
from model_analyzer.config.generate.concurrency_sweeper import ConcurrencySweeper
|
|
@@ -30,7 +17,6 @@ from model_analyzer.config.generate.search_config import SearchConfig
|
|
|
30
17
|
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
31
18
|
from model_analyzer.config.run.run_config import RunConfig
|
|
32
19
|
from model_analyzer.constants import LOGGER_NAME
|
|
33
|
-
from model_analyzer.result.parameter_search import ParameterSearch
|
|
34
20
|
from model_analyzer.result.result_manager import ResultManager
|
|
35
21
|
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
36
22
|
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from sys import maxsize
|
|
@@ -37,6 +25,7 @@ from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
|
|
|
37
25
|
from model_analyzer.config.run.model_run_config import ModelRunConfig
|
|
38
26
|
from model_analyzer.config.run.run_config import RunConfig
|
|
39
27
|
from model_analyzer.constants import LOGGER_NAME
|
|
28
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
40
29
|
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
41
30
|
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
42
31
|
from model_analyzer.triton.model.model_config import ModelConfig
|
|
@@ -437,33 +426,53 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
|
|
|
437
426
|
)
|
|
438
427
|
|
|
439
428
|
model_config_params = deepcopy(model.model_config_parameters())
|
|
429
|
+
|
|
430
|
+
# Extract user-specified instance_group kind before removing it
|
|
431
|
+
instance_kind = self._extract_instance_group_kind(model_config_params)
|
|
432
|
+
if not instance_kind:
|
|
433
|
+
# Fallback to cpu_only flag
|
|
434
|
+
instance_kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
|
|
435
|
+
|
|
440
436
|
if model_config_params:
|
|
437
|
+
# Remove parameters that are controlled by search dimensions
|
|
441
438
|
model_config_params.pop("max_batch_size", None)
|
|
439
|
+
model_config_params.pop("instance_group", None)
|
|
442
440
|
|
|
443
|
-
#
|
|
441
|
+
# Generate combinations from remaining parameters
|
|
442
|
+
# For composing models, this may include dynamic_batching settings, etc.
|
|
444
443
|
param_combos = GeneratorUtils.generate_combinations(model_config_params)
|
|
445
|
-
assert len(param_combos) == 1
|
|
446
444
|
|
|
447
|
-
|
|
445
|
+
# Top-level models must have exactly 1 combination (validated earlier)
|
|
446
|
+
# Composing models can have 1 combination (non-searchable params are fixed)
|
|
447
|
+
if len(param_combos) > 1:
|
|
448
|
+
raise TritonModelAnalyzerException(
|
|
449
|
+
f"Model {model.model_name()} has multiple parameter combinations "
|
|
450
|
+
f"after removing searchable parameters. This should have been caught "
|
|
451
|
+
f"during config validation."
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
param_combo = param_combos[0] if param_combos else {}
|
|
448
455
|
else:
|
|
449
456
|
param_combo = {}
|
|
450
457
|
|
|
451
|
-
|
|
458
|
+
# Add instance_group with count from dimension and kind from config
|
|
452
459
|
instance_count = self._calculate_instance_count(dimension_values)
|
|
453
|
-
|
|
454
460
|
param_combo["instance_group"] = [
|
|
455
461
|
{
|
|
456
462
|
"count": instance_count,
|
|
457
|
-
"kind":
|
|
463
|
+
"kind": instance_kind,
|
|
458
464
|
}
|
|
459
465
|
]
|
|
460
466
|
|
|
467
|
+
# Add max_batch_size from dimension if applicable
|
|
461
468
|
if "max_batch_size" in dimension_values:
|
|
462
469
|
param_combo["max_batch_size"] = self._calculate_model_batch_size(
|
|
463
470
|
dimension_values
|
|
464
471
|
)
|
|
465
472
|
|
|
466
|
-
if model
|
|
473
|
+
# Add default dynamic_batching if model supports it and not already specified
|
|
474
|
+
# Preserves user-specified dynamic_batching settings (single combinations only)
|
|
475
|
+
if model.supports_dynamic_batching() and "dynamic_batching" not in param_combo:
|
|
467
476
|
param_combo["dynamic_batching"] = {}
|
|
468
477
|
|
|
469
478
|
model_config_variant = BaseModelConfigGenerator.make_model_config_variant(
|
|
@@ -475,6 +484,48 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
|
|
|
475
484
|
|
|
476
485
|
return model_config_variant
|
|
477
486
|
|
|
487
|
+
def _extract_instance_group_kind(self, model_config_params: dict) -> str:
|
|
488
|
+
"""
|
|
489
|
+
Extract the 'kind' field from instance_group in model_config_parameters.
|
|
490
|
+
|
|
491
|
+
The config parser may wrap values in lists for sweep support, so we need
|
|
492
|
+
to handle structures like:
|
|
493
|
+
- [[{'count': [1, 2, 4], 'kind': ['KIND_CPU']}]] (double-wrapped, kind is list)
|
|
494
|
+
- [{'count': [1, 2, 4], 'kind': 'KIND_CPU'}] (single-wrapped, kind is string)
|
|
495
|
+
|
|
496
|
+
Returns empty string if not found or if instance_group is not specified.
|
|
497
|
+
"""
|
|
498
|
+
if not model_config_params or "instance_group" not in model_config_params:
|
|
499
|
+
return ""
|
|
500
|
+
|
|
501
|
+
instance_group = model_config_params["instance_group"]
|
|
502
|
+
|
|
503
|
+
# Handle various nested list structures from config parsing
|
|
504
|
+
if isinstance(instance_group, list) and len(instance_group) > 0:
|
|
505
|
+
# Handle nested structure: [[ {...} ]]
|
|
506
|
+
while (
|
|
507
|
+
isinstance(instance_group, list)
|
|
508
|
+
and len(instance_group) > 0
|
|
509
|
+
and isinstance(instance_group[0], list)
|
|
510
|
+
):
|
|
511
|
+
instance_group = instance_group[0]
|
|
512
|
+
|
|
513
|
+
# Now should have [{...}] structure
|
|
514
|
+
if (
|
|
515
|
+
isinstance(instance_group, list)
|
|
516
|
+
and len(instance_group) > 0
|
|
517
|
+
and isinstance(instance_group[0], dict)
|
|
518
|
+
):
|
|
519
|
+
kind = instance_group[0].get("kind", "")
|
|
520
|
+
# Handle case where kind is wrapped in a list by config parser
|
|
521
|
+
# e.g., ['KIND_CPU'] instead of 'KIND_CPU'
|
|
522
|
+
if isinstance(kind, list) and len(kind) > 0:
|
|
523
|
+
kind = kind[0]
|
|
524
|
+
if isinstance(kind, str) and kind in ("KIND_CPU", "KIND_GPU"):
|
|
525
|
+
return kind
|
|
526
|
+
|
|
527
|
+
return ""
|
|
528
|
+
|
|
478
529
|
def _create_next_model_run_config(
|
|
479
530
|
self,
|
|
480
531
|
model: ModelProfileSpec,
|
|
@@ -507,13 +558,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
|
|
|
507
558
|
|
|
508
559
|
perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
|
|
509
560
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
561
|
+
if not model.is_load_specified():
|
|
562
|
+
concurrency = self._calculate_concurrency(dimension_values)
|
|
563
|
+
perf_config_params = {
|
|
564
|
+
"batch-size": DEFAULT_BATCH_SIZES,
|
|
565
|
+
"concurrency-range": concurrency,
|
|
566
|
+
}
|
|
567
|
+
perf_analyzer_config.update_config(perf_config_params)
|
|
517
568
|
|
|
518
569
|
perf_analyzer_config.update_config(model.perf_analyzer_flags())
|
|
519
570
|
return perf_analyzer_config
|
|
@@ -703,13 +754,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
|
|
|
703
754
|
model_config.get_field("name"), self._config
|
|
704
755
|
)
|
|
705
756
|
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
757
|
+
if not model.is_load_specified():
|
|
758
|
+
default_concurrency = self._calculate_default_concurrency(model_config)
|
|
759
|
+
perf_config_params = {
|
|
760
|
+
"batch-size": DEFAULT_BATCH_SIZES,
|
|
761
|
+
"concurrency-range": default_concurrency,
|
|
762
|
+
}
|
|
763
|
+
default_perf_analyzer_config.update_config(perf_config_params)
|
|
713
764
|
|
|
714
765
|
default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
|
|
715
766
|
|