triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,143 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import time
18
+ from abc import ABC, abstractmethod
19
+ from multiprocessing.pool import ThreadPool
20
+
21
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
22
+
23
+
24
+ class Monitor(ABC):
25
+ """
26
+ Monitor abstract class is a parent class used for monitoring devices.
27
+ """
28
+
29
+ def __init__(self, frequency, metrics):
30
+ """
31
+ Parameters
32
+ ----------
33
+ frequency : float
34
+ How often the metrics should be monitored.
35
+ metrics : list
36
+ A list of Record objects that will be monitored.
37
+
38
+ Raises
39
+ ------
40
+ TritonModelAnalyzerException
41
+ """
42
+
43
+ self._frequency = frequency
44
+
45
+ # Is the background thread active
46
+ self._thread_active = False
47
+
48
+ # Background thread collecting results
49
+ self._thread = None
50
+
51
+ # Thread pool
52
+ self._thread_pool = ThreadPool(processes=1)
53
+ self._metrics = metrics
54
+
55
+ def _monitoring_loop(self):
56
+ frequency = self._frequency
57
+
58
+ while self._thread_active:
59
+ begin = time.time()
60
+ # Monitoring iteration implemented by each of the subclasses
61
+ self._monitoring_iteration()
62
+
63
+ duration = time.time() - begin
64
+ if duration < frequency:
65
+ time.sleep(frequency - duration)
66
+
67
+ @abstractmethod
68
+ def _monitoring_iteration(self):
69
+ """
70
+ Each of the subclasses must implement this.
71
+ This is called to execute a single round of monitoring.
72
+ """
73
+
74
+ pass
75
+
76
+ @abstractmethod
77
+ def _collect_records(self):
78
+ """
79
+ This method is called to collect all the monitoring records.
80
+ It is called in the stop_recording_metrics function after
81
+ the background thread has stopped.
82
+
83
+ Returns
84
+ -------
85
+ List of Records
86
+ The list of records collected by the monitor
87
+ """
88
+
89
+ pass
90
+
91
+ @abstractmethod
92
+ def is_monitoring_connected(self) -> bool:
93
+ """
94
+ This method is called to determine if we can connect to the
95
+ monitor
96
+
97
+ Returns
98
+ -------
99
+ bool
100
+ True if connection to the monitor was successful
101
+ """
102
+
103
+ pass
104
+
105
+ def start_recording_metrics(self):
106
+ """
107
+ Start recording the metrics.
108
+ """
109
+
110
+ self._thread_active = True
111
+ self._thread = self._thread_pool.apply_async(self._monitoring_loop)
112
+
113
+ def stop_recording_metrics(self):
114
+ """
115
+ Stop recording metrics. This will stop monitoring all the metrics.
116
+
117
+ Returns
118
+ ------
119
+ List of Records
120
+
121
+ Raises
122
+ ------
123
+ TritonModelAnalyzerException
124
+ """
125
+
126
+ if not self._thread_active:
127
+ raise TritonModelAnalyzerException(
128
+ "start_recording_metrics should be "
129
+ "called before stop_recording_metrics"
130
+ )
131
+
132
+ self._thread_active = False
133
+ self._thread = None
134
+
135
+ return self._collect_records()
136
+
137
+ def destroy(self):
138
+ """
139
+ Cleanup threadpool resources
140
+ """
141
+
142
+ self._thread_pool.terminate()
143
+ self._thread_pool.close()
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+
19
+ import requests
20
+ from prometheus_client.parser import text_string_to_metric_families
21
+
22
+ from model_analyzer.constants import LOGGER_NAME
23
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
24
+ from model_analyzer.record.types.gpu_free_memory import GPUFreeMemory
25
+ from model_analyzer.record.types.gpu_power_usage import GPUPowerUsage
26
+ from model_analyzer.record.types.gpu_used_memory import GPUUsedMemory
27
+ from model_analyzer.record.types.gpu_utilization import GPUUtilization
28
+
29
+ from .monitor import Monitor
30
+
31
+ logger = logging.getLogger(LOGGER_NAME)
32
+
33
+
34
+ class RemoteMonitor(Monitor):
35
+ """
36
+ Requests metrics from Triton's metrics
37
+ endpoint
38
+ """
39
+
40
+ gpu_metrics = {
41
+ "nv_gpu_utilization": GPUUtilization,
42
+ "nv_gpu_memory_used_bytes": GPUUsedMemory,
43
+ "nv_gpu_power_usage": GPUPowerUsage,
44
+ "nv_gpu_memory_total_bytes": GPUFreeMemory,
45
+ }
46
+
47
+ def __init__(self, metrics_url, frequency, metrics):
48
+ super().__init__(frequency, metrics)
49
+ self._metrics_url = metrics_url
50
+ self._metrics_responses = []
51
+
52
+ allowed_metrics = set(self.gpu_metrics.values())
53
+ if not set(metrics).issubset(allowed_metrics):
54
+ unsupported_metrics = set(metrics) - allowed_metrics
55
+ raise TritonModelAnalyzerException(
56
+ f"GPU monitoring does not currently support the following metrics: {unsupported_metrics}]"
57
+ )
58
+
59
+ def is_monitoring_connected(self) -> bool:
60
+ try:
61
+ status_code = requests.get(self._metrics_url, timeout=10).status_code
62
+ except Exception as ex:
63
+ return False
64
+
65
+ return status_code == requests.codes["okay"]
66
+
67
+ def _monitoring_iteration(self):
68
+ """
69
+ When this function runs, it requests all the metrics
70
+ that triton has collected and organizes them into
71
+ the dict. This function should run as fast
72
+ as possible
73
+ """
74
+
75
+ self._metrics_responses.append(
76
+ str(requests.get(self._metrics_url, timeout=10).content, encoding="ascii")
77
+ )
78
+
79
+ def _collect_records(self):
80
+ """
81
+ This function will organize the metrics responses
82
+ and create Records out of them
83
+ """
84
+
85
+ records = []
86
+
87
+ for response in self._metrics_responses:
88
+ metrics = text_string_to_metric_families(response)
89
+ processed_gpu_used_memory = False
90
+ calculate_free_memory_after_pass = False
91
+ gpu_memory_used_bytes = None
92
+ for metric in metrics:
93
+ if (
94
+ metric.name in self.gpu_metrics
95
+ and self.gpu_metrics[metric.name] in self._metrics
96
+ ):
97
+ for sample in metric.samples:
98
+ if sample.name == "nv_gpu_memory_used_bytes":
99
+ processed_gpu_used_memory = True
100
+ gpu_memory_used_bytes = sample.value
101
+ self._create_and_add_record(
102
+ records, sample, gpu_memory_used_bytes // 1.0e6
103
+ )
104
+ elif sample.name == "nv_gpu_memory_total_bytes":
105
+ if processed_gpu_used_memory:
106
+ self._create_and_add_record(
107
+ records,
108
+ sample,
109
+ (sample.value - gpu_memory_used_bytes) // 1.0e6,
110
+ )
111
+ else:
112
+ total_memory_metric = metric
113
+ calculate_free_memory_after_pass = True
114
+ elif sample.name == "nv_gpu_utilization":
115
+ self._create_and_add_record(
116
+ records, sample, sample.value * 100
117
+ )
118
+ else:
119
+ self._create_and_add_record(records, sample, sample.value)
120
+ if calculate_free_memory_after_pass:
121
+ for sample in total_memory_metric.samples:
122
+ self._create_and_add_record(
123
+ records, sample, (sample.value - gpu_memory_used_bytes) // 1.0e6
124
+ )
125
+
126
+ return records
127
+
128
+ def _create_and_add_record(self, records, sample, sample_value):
129
+ """
130
+ Adds a record to given dict
131
+ """
132
+
133
+ records.append(
134
+ self.gpu_metrics[sample.name](
135
+ value=sample_value, device_uuid=sample.labels["gpu_uuid"]
136
+ )
137
+ )
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
18
+
19
+ from .output_writer import OutputWriter
20
+
21
+
22
+ class FileWriter(OutputWriter):
23
+ """
24
+ Writes table to a file or stdout
25
+ """
26
+
27
+ def __init__(self, filename=None):
28
+ """
29
+ Parameters
30
+ ----------
31
+ filename : File
32
+ The full path to the file or stream to write the output to.
33
+ Writes to stdout if filename is None
34
+ """
35
+
36
+ self._filename = filename
37
+
38
+ def write(self, out, append=False):
39
+ """
40
+ Writes the output to a file or stdout
41
+
42
+ Parameters
43
+ ----------
44
+ out : str
45
+ The string to be written to the
46
+ file or stdout
47
+
48
+ Raises
49
+ ------
50
+ TritonModelAnalyzerException
51
+ If there is an error or exception while writing
52
+ the output.
53
+ """
54
+
55
+ write_mode = "a+" if append else "w+"
56
+ if self._filename:
57
+ try:
58
+ with open(self._filename, write_mode) as f:
59
+ f.write(out)
60
+ except OSError as e:
61
+ raise TritonModelAnalyzerException(e)
62
+ else:
63
+ print(out, end="")
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from abc import ABC, abstractmethod
18
+
19
+
20
+ class OutputWriter(ABC):
21
+ """
22
+ Interface that receives a table
23
+ and writes the table to a file or stream.
24
+ """
25
+
26
+ @abstractmethod
27
+ def write(self, out):
28
+ """
29
+ Writes the output to a file
30
+ (stdout, .txt, .csv etc.)
31
+
32
+ Parameters
33
+ ----------
34
+ out : str
35
+ The string to be written out
36
+
37
+ Raises
38
+ ------
39
+ TritonModelAnalyzerException
40
+ If there is an error or exception while writing
41
+ the output.
42
+ """
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
@@ -0,0 +1,206 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
18
+
19
+
20
+ class GenaiPerfConfig:
21
+ """
22
+ A config class to set arguments to the genai_perf.
23
+ An argument set to None will use the genai_perf's default.
24
+ """
25
+
26
+ genai_perf_args = [
27
+ "backend",
28
+ "endpoint",
29
+ "service-kind",
30
+ "url",
31
+ "expected-output-tokens",
32
+ "input-dataset",
33
+ "input-tokens-mean",
34
+ "input-tokens-stddev",
35
+ "input-type",
36
+ "num-of-output-prompts",
37
+ "random-seed",
38
+ "streaming",
39
+ "tokenizer",
40
+ ]
41
+
42
+ boolean_args = ["streaming"]
43
+
44
+ def __init__(self):
45
+ """
46
+ Construct a GenaiPerfConfig
47
+ """
48
+
49
+ self._args = {k: None for k in self.genai_perf_args}
50
+
51
+ @classmethod
52
+ def allowed_keys(cls):
53
+ """
54
+ Returns
55
+ -------
56
+ list of str
57
+ The keys that are allowed to be
58
+ passed into perf_analyzer
59
+ """
60
+
61
+ return cls.genai_perf_args
62
+
63
+ def update_config(self, params=None):
64
+ """
65
+ Allows setting values from a params dict
66
+
67
+ Parameters
68
+ ----------
69
+ params: dict
70
+ keys are allowed args to perf_analyzer
71
+ """
72
+
73
+ if params and type(params) is dict:
74
+ for key in params:
75
+ self[key] = params[key]
76
+
77
+ @classmethod
78
+ def from_dict(cls, genai_perf_config_dict):
79
+ genai_perf_config = GenaiPerfConfig()
80
+ for key in [
81
+ "_args",
82
+ ]:
83
+ if key in genai_perf_config_dict:
84
+ setattr(genai_perf_config, key, genai_perf_config_dict[key])
85
+ return genai_perf_config
86
+
87
+ def representation(self):
88
+ """
89
+ Returns
90
+ -------
91
+ str
92
+ a string representation of the Genai Perf config
93
+ that removes values which can vary between
94
+ runs, but should be ignored when determining
95
+ if a previous (checkpointed) run can be used
96
+ """
97
+ cli_string = self.to_cli_string()
98
+
99
+ return cli_string
100
+
101
+ def to_cli_string(self) -> str:
102
+ """
103
+ Utility function to convert a config into a
104
+ string of arguments to the perf_analyzer with CLI.
105
+
106
+ Returns
107
+ -------
108
+ str
109
+ cli command string consisting of all arguments
110
+ to the perf_analyzer set in the config, without
111
+ the executable name.
112
+ """
113
+
114
+ # single dashed options, then verbose flags, then main args
115
+ args = []
116
+ args.extend(self._parse_options())
117
+
118
+ return " ".join(args)
119
+
120
+ def _parse_options(self):
121
+ """
122
+ Parse the genai perf args
123
+ """
124
+ temp_args = []
125
+ for key, value in self._args.items():
126
+ if key in self.boolean_args:
127
+ temp_args = self._parse_boolean_args(key, value, temp_args)
128
+ elif value:
129
+ temp_args.append(f"--{key}={value}")
130
+ return temp_args
131
+
132
+ def _parse_boolean_args(self, key, value, temp_args):
133
+ """
134
+ Parse genai perf args that should not add a value to the cli string
135
+ """
136
+ assert type(value) in [
137
+ str,
138
+ type(None),
139
+ ], f"Data type for arg {key} must be a (boolean) string instead of {type(value)}"
140
+ if value != None and value.lower() == "true":
141
+ temp_args.append(f"--{key}")
142
+ return temp_args
143
+
144
+ def __getitem__(self, key):
145
+ """
146
+ Gets an arguments value in config
147
+
148
+ Parameters
149
+ ----------
150
+ key : str
151
+ The name of the argument to the genai perf config
152
+
153
+ Returns
154
+ -------
155
+ object
156
+ The value that the argument is set to in this config
157
+
158
+ Raises
159
+ ------
160
+ KeyError
161
+ If argument not found in the config
162
+ """
163
+
164
+ if key in self._args:
165
+ return self._args[key]
166
+ else:
167
+ raise TritonModelAnalyzerException(
168
+ f"Key {key} does not exist in genai_perf_flags."
169
+ )
170
+
171
+ def __setitem__(self, key, value):
172
+ """
173
+ Sets an arguments value in config
174
+ after checking if defined/supported.
175
+
176
+ Parameters
177
+ ----------
178
+ key : str
179
+ The name of the argument in genai_perf
180
+ value : (any)
181
+ The value to which the argument is being set
182
+
183
+ Raises
184
+ ------
185
+ TritonModelAnalyzerException
186
+ If key is unsupported or undefined in the
187
+ config class
188
+ """
189
+
190
+ if key in self._args:
191
+ self._args[key] = value
192
+ else:
193
+ raise TritonModelAnalyzerException(
194
+ f"The argument '{key}' to the genai_perf "
195
+ "is not supported by model analyzer."
196
+ )
197
+
198
+ def __contains__(self, key):
199
+ """
200
+ Returns
201
+ -------
202
+ True if key is in perf_config i.e. the key is a
203
+ genai perf config argument
204
+ """
205
+
206
+ return key in GenaiPerfConfig.allowed_keys()