triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,187 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ from typing import Dict, List
19
+
20
+ from model_analyzer.config.generate.model_variant_name_manager import (
21
+ ModelVariantNameManager,
22
+ )
23
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
24
+ from model_analyzer.constants import DEFAULT_CONFIG_PARAMS, LOGGER_NAME
25
+ from model_analyzer.device.gpu_device import GPUDevice
26
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
27
+ from model_analyzer.triton.client.client import TritonClient
28
+ from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
29
+
30
+ from .base_model_config_generator import BaseModelConfigGenerator
31
+ from .generator_utils import GeneratorUtils
32
+ from .model_profile_spec import ModelProfileSpec
33
+
34
+ logger = logging.getLogger(LOGGER_NAME)
35
+ from copy import deepcopy
36
+
37
+
38
+ class ManualModelConfigGenerator(BaseModelConfigGenerator):
39
+ """Given a model, generates model configs in manual search mode"""
40
+
41
+ _log_first_run = False
42
+
43
+ def __init__(
44
+ self,
45
+ config: ConfigCommandProfile,
46
+ gpus: List[GPUDevice],
47
+ model: ModelProfileSpec,
48
+ client: TritonClient,
49
+ model_variant_name_manager: ModelVariantNameManager,
50
+ default_only: bool,
51
+ early_exit_enable: bool,
52
+ ) -> None:
53
+ """
54
+ Parameters
55
+ ----------
56
+ config: ModelAnalyzerConfig
57
+ gpus: List of GPUDevices
58
+ model: The model to generate ModelConfigs for
59
+ client: TritonClient
60
+ model_variant_name_manager: ModelVariantNameManager
61
+ default_only: Bool
62
+ If true, only the default config will be generated
63
+ If false, the default config will NOT be generated
64
+ early_exit_enable: Bool
65
+ If true, the generator can early exit if throughput plateaus
66
+ """
67
+ super().__init__(
68
+ config,
69
+ gpus,
70
+ model,
71
+ client,
72
+ model_variant_name_manager,
73
+ default_only,
74
+ early_exit_enable,
75
+ )
76
+
77
+ if not ManualModelConfigGenerator._log_first_run:
78
+ logger.info("")
79
+ logger.info("Starting manual brute search")
80
+ logger.info("")
81
+ ManualModelConfigGenerator._log_first_run = True
82
+
83
+ self._search_disabled = config.run_config_search_disable
84
+ self._curr_config_index = 0
85
+ self._curr_max_batch_size_index = 0
86
+
87
+ self._max_batch_sizes = None
88
+ self._non_max_batch_size_param_combos: List[Dict] = []
89
+ self._determine_max_batch_sizes_and_param_combos()
90
+
91
+ # All configs are pregenerated in _configs[][]
92
+ # Indexed as follows:
93
+ # _configs[_curr_config_index][_curr_max_batch_size_index]
94
+ #
95
+ self._configs = self._generate_model_config_variants()
96
+
97
+ def _done_walking(self) -> bool:
98
+ return len(self._configs) == self._curr_config_index
99
+
100
+ def _done_walking_max_batch_size(self) -> bool:
101
+ if (
102
+ self._max_batch_sizes is None
103
+ or len(self._max_batch_sizes) == self._curr_max_batch_size_index
104
+ ):
105
+ return True
106
+
107
+ if self._early_exit_enable and self._last_results_erroneous():
108
+ return True
109
+
110
+ if self._early_exit_enable and not self._last_results_increased_throughput():
111
+ self._print_max_batch_size_plateau_warning()
112
+ return True
113
+ return False
114
+
115
+ def _step(self) -> None:
116
+ self._step_max_batch_size()
117
+
118
+ if self._done_walking_max_batch_size():
119
+ self._reset_max_batch_size()
120
+ self._step_config()
121
+
122
+ def _reset_max_batch_size(self) -> None:
123
+ super()._reset_max_batch_size()
124
+ self._curr_max_batch_size_index = 0
125
+
126
+ def _step_config(self) -> None:
127
+ self._curr_config_index += 1
128
+
129
+ def _step_max_batch_size(self) -> None:
130
+ self._curr_max_batch_size_index += 1
131
+
132
+ last_max_throughput = self._get_last_results_max_throughput()
133
+ if last_max_throughput:
134
+ self._curr_max_batch_size_throughputs.append(last_max_throughput)
135
+
136
+ def _get_next_model_config_variant(self) -> ModelConfigVariant:
137
+ return self._configs[self._curr_config_index][self._curr_max_batch_size_index]
138
+
139
+ def _generate_model_config_variants(self) -> List[List[ModelConfigVariant]]:
140
+ """Generate all model config combinations"""
141
+
142
+ model_config_variants = []
143
+ for param_combo in self._non_max_batch_size_param_combos:
144
+ configs_with_max_batch_size = []
145
+ if self._max_batch_sizes:
146
+ for mbs in self._max_batch_sizes:
147
+ param_combo["max_batch_size"] = mbs
148
+ model_config_variant = self._make_direct_mode_model_config_variant(
149
+ param_combo
150
+ )
151
+ configs_with_max_batch_size.append(model_config_variant)
152
+ else:
153
+ model_config_variant = self._make_direct_mode_model_config_variant(
154
+ param_combo
155
+ )
156
+ configs_with_max_batch_size.append(model_config_variant)
157
+
158
+ model_config_variants.append(configs_with_max_batch_size)
159
+
160
+ return model_config_variants
161
+
162
+ def _determine_max_batch_sizes_and_param_combos(self) -> None:
163
+ """
164
+ Determine self._max_batch_sizes and self._non_max_batch_size_param_combos
165
+ """
166
+ if self._default_only:
167
+ self._non_max_batch_size_param_combos = [DEFAULT_CONFIG_PARAMS]
168
+ else:
169
+ model_config_params = deepcopy(self._base_model.model_config_parameters())
170
+ if model_config_params:
171
+ self._max_batch_sizes = model_config_params.pop("max_batch_size", None)
172
+ self._non_max_batch_size_param_combos = (
173
+ GeneratorUtils.generate_combinations(model_config_params)
174
+ )
175
+ else:
176
+ if self._search_disabled:
177
+ self._non_max_batch_size_param_combos = (
178
+ self._generate_search_disabled_param_combos()
179
+ )
180
+ else:
181
+ raise TritonModelAnalyzerException(
182
+ f"Automatic search not supported in ManualModelConfigGenerator"
183
+ )
184
+
185
+ def _generate_search_disabled_param_combos(self) -> List[Dict]:
186
+ """Return the configs when we want to search but searching is disabled"""
187
+ return [DEFAULT_CONFIG_PARAMS]
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from typing import List
18
+
19
+ from model_analyzer.config.generate.model_variant_name_manager import (
20
+ ModelVariantNameManager,
21
+ )
22
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
23
+ from model_analyzer.device.gpu_device import GPUDevice
24
+ from model_analyzer.triton.client.client import TritonClient
25
+
26
+ from .automatic_model_config_generator import AutomaticModelConfigGenerator
27
+ from .config_generator_interface import ConfigGeneratorInterface
28
+ from .manual_model_config_generator import ManualModelConfigGenerator
29
+ from .model_profile_spec import ModelProfileSpec
30
+
31
+
32
+ class ModelConfigGeneratorFactory:
33
+ """
34
+ Factory that creates the correct Config Generators
35
+ """
36
+
37
+ @staticmethod
38
+ def create_model_config_generator(
39
+ config: ConfigCommandProfile,
40
+ gpus: List[GPUDevice],
41
+ model: ModelProfileSpec,
42
+ client: TritonClient,
43
+ model_variant_name_manager: ModelVariantNameManager,
44
+ default_only: bool,
45
+ early_exit_enable: bool,
46
+ ) -> ConfigGeneratorInterface:
47
+ """
48
+ Parameters
49
+ ----------
50
+ config: ConfigCommandProfile
51
+ The Model Analyzer config file for the profile step
52
+ gpus: List of GPUDevices
53
+ model: ConfigModelProfileSpec
54
+ The model to generate ModelRunConfigs for
55
+ client: TritonClient
56
+ The client handle used to send requests to Triton
57
+ model_variant_name_manager: ModelVariantNameManager
58
+ Used to manage the model variant names
59
+ default_only: Bool
60
+ If true, only the default config will be generated by the created generator
61
+ If false, the default config will NOT be generated by the created generator
62
+ early_exit_enable: Bool
63
+ If true, the created generator can early exit if throughput plateaus
64
+
65
+ Returns
66
+ -------
67
+ A generator that implements ConfigGeneratorInterface and creates ModelConfigs
68
+ """
69
+
70
+ search_disabled = config.run_config_search_disable
71
+ model_config_params = model.model_config_parameters()
72
+
73
+ if search_disabled or model_config_params:
74
+ return ManualModelConfigGenerator(
75
+ config,
76
+ gpus,
77
+ model,
78
+ client,
79
+ model_variant_name_manager,
80
+ default_only,
81
+ early_exit_enable,
82
+ )
83
+ else:
84
+ return AutomaticModelConfigGenerator(
85
+ config,
86
+ gpus,
87
+ model,
88
+ client,
89
+ model_variant_name_manager,
90
+ default_only,
91
+ early_exit_enable,
92
+ )
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from copy import deepcopy
18
+ from typing import List
19
+
20
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
21
+ from model_analyzer.config.input.objects.config_model_profile_spec import (
22
+ ConfigModelProfileSpec,
23
+ )
24
+ from model_analyzer.device.gpu_device import GPUDevice
25
+ from model_analyzer.triton.client.client import TritonClient
26
+ from model_analyzer.triton.model.model_config import ModelConfig
27
+
28
+
29
+ class ModelProfileSpec(ConfigModelProfileSpec):
30
+ """
31
+ The profile configuration and default model config for a single model to be profiled
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ spec: ConfigModelProfileSpec,
37
+ config: ConfigCommandProfile,
38
+ client: TritonClient,
39
+ gpus: List[GPUDevice],
40
+ ):
41
+ super().__init__(spec.model_name())
42
+ self.__dict__ = deepcopy(spec.__dict__)
43
+
44
+ self._default_model_config = ModelConfig.create_model_config_dict(
45
+ config, client, gpus, config.model_repository, spec.model_name()
46
+ )
47
+
48
+ if spec.model_name() in config.cpu_only_composing_models:
49
+ self._cpu_only = True
50
+
51
+ def get_default_config(self) -> dict:
52
+ """Returns the default configuration for this model"""
53
+ return deepcopy(self._default_model_config)
54
+
55
+ def supports_batching(self) -> bool:
56
+ """Returns True if this model supports batching. Else False"""
57
+ if (
58
+ "max_batch_size" not in self._default_model_config
59
+ or self._default_model_config["max_batch_size"] == 0
60
+ ):
61
+ return False
62
+ return True
63
+
64
+ def supports_dynamic_batching(self) -> bool:
65
+ """Returns True if this model supports dynamic batching. Else False"""
66
+ supports_dynamic_batching = self.supports_batching()
67
+
68
+ if "sequence_batching" in self._default_model_config:
69
+ supports_dynamic_batching = False
70
+ return supports_dynamic_batching
71
+
72
+ def is_ensemble(self) -> bool:
73
+ """Returns true if the model is an ensemble"""
74
+ return "ensemble_scheduling" in self._default_model_config
@@ -0,0 +1,154 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from typing import Generator, List, Optional
18
+
19
+ from model_analyzer.config.generate.model_variant_name_manager import (
20
+ ModelVariantNameManager,
21
+ )
22
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
23
+ from model_analyzer.config.run.model_run_config import ModelRunConfig
24
+ from model_analyzer.device.gpu_device import GPUDevice
25
+ from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
26
+ from model_analyzer.result.run_config_measurement import RunConfigMeasurement
27
+ from model_analyzer.triton.client.client import TritonClient
28
+ from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
29
+
30
+ from .config_generator_interface import ConfigGeneratorInterface
31
+ from .model_config_generator_factory import ModelConfigGeneratorFactory
32
+ from .model_profile_spec import ModelProfileSpec
33
+ from .perf_analyzer_config_generator import PerfAnalyzerConfigGenerator
34
+
35
+
36
+ class ModelRunConfigGenerator(ConfigGeneratorInterface):
37
+ """
38
+ Given a model, generates all ModelRunConfigs (combination of
39
+ ModelConfig and PerfConfig)
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ config: ConfigCommandProfile,
45
+ gpus: List[GPUDevice],
46
+ model: ModelProfileSpec,
47
+ client: TritonClient,
48
+ model_variant_name_manager: ModelVariantNameManager,
49
+ default_only: bool,
50
+ ) -> None:
51
+ """
52
+ Parameters
53
+ ----------
54
+ config: ModelAnalyzerConfig
55
+
56
+ gpus: List of GPUDevices
57
+
58
+ model: ConfigModelProfileSpec
59
+ The model to generate ModelRunConfigs for
60
+
61
+ client: TritonClient
62
+
63
+ model_variant_name_manager: ModelVariantNameManager
64
+
65
+ default_only: Bool
66
+ """
67
+ self._config = config
68
+ self._gpus = gpus
69
+ self._model = model
70
+ self._client = client
71
+ self._model_variant_name_manager = model_variant_name_manager
72
+
73
+ self._model_name = model.model_name()
74
+
75
+ self._model_pa_flags = model.perf_analyzer_flags()
76
+ self._model_parameters = model.parameters()
77
+ self._triton_server_env = model.triton_server_environment()
78
+
79
+ self._determine_early_exit_enables(config, model)
80
+
81
+ self._mcg = ModelConfigGeneratorFactory.create_model_config_generator(
82
+ self._config,
83
+ self._gpus,
84
+ model,
85
+ self._client,
86
+ self._model_variant_name_manager,
87
+ default_only,
88
+ self._mcg_early_exit_enable,
89
+ )
90
+
91
+ self._curr_mc_measurements: List[Optional[RunConfigMeasurement]] = []
92
+
93
+ def get_configs(self) -> Generator[ModelRunConfig, None, None]:
94
+ """
95
+ Returns
96
+ -------
97
+ ModelRunConfig
98
+ The next ModelRunConfig generated by this class
99
+ """
100
+ for model_config_variant in self._mcg.get_configs():
101
+ self._pacg = PerfAnalyzerConfigGenerator(
102
+ self._config,
103
+ model_config_variant.model_config.get_field("name"),
104
+ self._model_pa_flags,
105
+ self._model_parameters,
106
+ self._pacg_early_exit_enable,
107
+ )
108
+
109
+ for perf_analyzer_config in self._pacg.get_configs():
110
+ run_config = self._generate_model_run_config(
111
+ model_config_variant, perf_analyzer_config
112
+ )
113
+ yield run_config
114
+
115
+ self._set_last_results_model_config_generator()
116
+
117
+ def set_last_results(
118
+ self, measurements: List[Optional[RunConfigMeasurement]]
119
+ ) -> None:
120
+ """
121
+ Given the results from the last ModelRunConfig, make decisions
122
+ about future configurations to generate
123
+
124
+ Parameters
125
+ ----------
126
+ measurements: List of Measurements from the last run(s)
127
+ """
128
+ self._pacg.set_last_results(measurements)
129
+ self._curr_mc_measurements.extend(measurements)
130
+
131
+ def _set_last_results_model_config_generator(self) -> None:
132
+ self._mcg.set_last_results(self._curr_mc_measurements)
133
+ self._curr_mc_measurements = []
134
+
135
+ def _generate_model_run_config(
136
+ self,
137
+ model_config_variant: ModelConfigVariant,
138
+ perf_analyzer_config: PerfAnalyzerConfig,
139
+ ) -> ModelRunConfig:
140
+ run_config = ModelRunConfig(
141
+ self._model_name, model_config_variant, perf_analyzer_config
142
+ )
143
+
144
+ return run_config
145
+
146
+ def _determine_early_exit_enables(
147
+ self, config: ConfigCommandProfile, model: ModelProfileSpec
148
+ ) -> None:
149
+ early_exit_enable = config.early_exit_enable
150
+ concurrency_specified = model.parameters()["concurrency"]
151
+ config_parameters_exist = model.model_config_parameters()
152
+
153
+ self._pacg_early_exit_enable = early_exit_enable or not concurrency_specified
154
+ self._mcg_early_exit_enable = early_exit_enable or not config_parameters_exist
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from copy import deepcopy
18
+ from typing import Dict, List, Tuple
19
+
20
+ from model_analyzer.constants import DEFAULT_CONFIG_PARAMS
21
+ from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
22
+
23
+
24
+ class ModelVariantNameManager:
25
+ def __init__(self) -> None:
26
+ # Dict of {model_config_name: model_config_dict}
27
+ self._model_config_dicts: Dict[str, Dict] = {}
28
+
29
+ # Dict of {base_model_name: current_count_integer}
30
+ self._model_name_index: Dict[str, int] = {}
31
+
32
+ @classmethod
33
+ def from_dict(
34
+ cls, model_variant_name_manager_dict: Dict
35
+ ) -> "ModelVariantNameManager":
36
+ model_variant_name_manager = ModelVariantNameManager()
37
+
38
+ model_variant_name_manager._model_config_dicts = (
39
+ model_variant_name_manager_dict["_model_config_dicts"]
40
+ )
41
+ model_variant_name_manager._model_name_index = model_variant_name_manager_dict[
42
+ "_model_name_index"
43
+ ]
44
+
45
+ return model_variant_name_manager
46
+
47
+ @staticmethod
48
+ def make_ensemble_composing_model_key(
49
+ ensemble_model_config_variants: List[ModelConfigVariant],
50
+ ) -> Dict[str, str]:
51
+ ensemble_names = [emcv.variant_name for emcv in ensemble_model_config_variants]
52
+ ensemble_key = ",".join(ensemble_names)
53
+
54
+ return {"key": ensemble_key}
55
+
56
+ def get_model_variant_name(
57
+ self, model_name: str, model_config_dict: Dict, param_combo: Dict
58
+ ) -> Tuple[bool, str]:
59
+ """
60
+ Given a base model name and a dict of parameters to be applied
61
+ to the base model config, return if the variant already existed
62
+ and the name of the model variant
63
+
64
+ If the same input values are provided to this function multiple times,
65
+ the same value will be returned
66
+ """
67
+ return self._get_variant_name(
68
+ model_name, model_config_dict, is_ensemble=False, param_combo=param_combo
69
+ )
70
+
71
+ def get_ensemble_model_variant_name(
72
+ self, model_name: str, ensemble_dict: Dict
73
+ ) -> Tuple[bool, str]:
74
+ """
75
+ Given a base ensemble model name and a dict of ensemble composing configs,
76
+ return if the variant already existed and the name of the model variant
77
+
78
+ If the same input values are provided to this function multiple times,
79
+ the same value will be returned
80
+ """
81
+ return self._get_variant_name(model_name, ensemble_dict, is_ensemble=True)
82
+
83
+ def _get_variant_name(
84
+ self,
85
+ model_name: str,
86
+ config_dict: Dict,
87
+ is_ensemble: bool,
88
+ param_combo: Dict = {},
89
+ ) -> Tuple[bool, str]:
90
+ model_config_dict = self._copy_and_restore_model_config_dict_name(
91
+ model_name, config_dict
92
+ )
93
+
94
+ variant_found, model_variant_name = self._find_existing_variant(
95
+ model_config_dict
96
+ )
97
+
98
+ if is_ensemble:
99
+ if self._is_ensemble_default_config(config_dict):
100
+ return (False, model_name + "_config_default")
101
+ else:
102
+ if self._is_default_config(param_combo):
103
+ return (False, model_name + "_config_default")
104
+
105
+ if variant_found:
106
+ return (True, model_variant_name)
107
+
108
+ model_variant_name = self._create_new_model_variant(
109
+ model_name, model_config_dict
110
+ )
111
+
112
+ return (False, model_variant_name)
113
+
114
+ def _copy_and_restore_model_config_dict_name(
115
+ self, model_name: str, model_config_dict: Dict
116
+ ) -> Dict:
117
+ model_config_dict_copy = deepcopy(model_config_dict)
118
+ model_config_dict_copy["name"] = model_name
119
+
120
+ return model_config_dict_copy
121
+
122
+ def _find_existing_variant(self, model_config_dict: Dict) -> Tuple[bool, str]:
123
+ for (
124
+ model_config_name,
125
+ model_config_variant_dict,
126
+ ) in self._model_config_dicts.items():
127
+ if model_config_dict == model_config_variant_dict:
128
+ return (True, model_config_name)
129
+
130
+ return (False, "")
131
+
132
+ def _is_default_config(self, param_combo: Dict) -> bool:
133
+ return param_combo == DEFAULT_CONFIG_PARAMS
134
+
135
+ def _is_ensemble_default_config(self, ensemble_dict: Dict) -> bool:
136
+ return "_config_default" in ensemble_dict["key"]
137
+
138
+ def _create_new_model_variant(
139
+ self, model_name: str, model_config_dict: Dict
140
+ ) -> str:
141
+ if model_name not in self._model_name_index:
142
+ new_index = 0
143
+ else:
144
+ new_index = self._model_name_index[model_name] + 1
145
+
146
+ self._model_name_index[model_name] = new_index
147
+ model_config_name = model_name + "_config_" + str(new_index)
148
+ self._model_config_dicts[model_config_name] = model_config_dict
149
+
150
+ return model_config_name