triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from abc import ABC, abstractmethod
18
+ from io import TextIOWrapper
19
+
20
+
21
+ class TritonServer(ABC):
22
+ """
23
+ Defines the interface for the objects created by
24
+ TritonServerFactory
25
+ """
26
+
27
+ @abstractmethod
28
+ def start(self, env=None):
29
+ """
30
+ Starts the tritonserver
31
+
32
+ Parameters
33
+ ----------
34
+ env: dict
35
+ The environment to set for this tritonserver launch
36
+ """
37
+
38
+ @abstractmethod
39
+ def stop(self):
40
+ """
41
+ Stops and cleans up after the server
42
+ """
43
+
44
+ @abstractmethod
45
+ def log_file(self) -> TextIOWrapper:
46
+ """
47
+ Returns the server's log file
48
+ """
49
+
50
+ @abstractmethod
51
+ def cpu_stats(self):
52
+ """
53
+ Returns the CPU memory usage and CPU available memory in MB
54
+ """
55
+
56
+ def update_config(self, params):
57
+ """
58
+ Update the server's arguments
59
+
60
+ Parameters
61
+ ----------
62
+ params: dict
63
+ keys are argument names and values are their values.
64
+ """
65
+
66
+ self._server_config.update_config(params)
67
+
68
+ def config(self):
69
+ """
70
+ Returns
71
+ -------
72
+ TritonServerConfig
73
+ This server's config
74
+ """
75
+
76
+ return self._server_config
@@ -0,0 +1,269 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
18
+
19
+
20
+ class TritonServerConfig:
21
+ """
22
+ A config class to set arguments to the Triton Inference
23
+ Server. An argument set to None will use the server default.
24
+ """
25
+
26
+ server_arg_keys = [
27
+ # Server
28
+ "id",
29
+ "exit-timeout-secs",
30
+ # Logging
31
+ "log-verbose",
32
+ "log-info",
33
+ "log-warning",
34
+ "log-error",
35
+ "log-format",
36
+ "log-file",
37
+ # Model Repository
38
+ "model-store",
39
+ "model-repository",
40
+ "exit-on-error",
41
+ "disable-auto-complete-config",
42
+ "strict-readiness",
43
+ "model-control-mode",
44
+ "repository-poll-secs",
45
+ "load-model",
46
+ "model-load-thread-count",
47
+ "model-load-retry-count",
48
+ "model-namespacing",
49
+ # HTTP
50
+ "allow-http",
51
+ "http-address",
52
+ "http-port",
53
+ "reuse-http-port",
54
+ "http-header-forward-pattern",
55
+ "http-thread-count",
56
+ "http-restricted-api",
57
+ # GRPC
58
+ "allow-grpc",
59
+ "grpc-address",
60
+ "grpc-port",
61
+ "reuse-grpc-port",
62
+ "grpc-header-forward-pattern",
63
+ "grpc-infer-allocation-pool-size",
64
+ "grpc-use-ssl",
65
+ "grpc-use-ssl-mutual",
66
+ "grpc-server-cert",
67
+ "grpc-server-key",
68
+ "grpc-root-cert",
69
+ "grpc-infer-response-compression-level",
70
+ "grpc-keepalive-time",
71
+ "grpc-keepalive-timeout",
72
+ "grpc-keepalive-permit-without-calls",
73
+ "grpc-http2-max-pings-without-data",
74
+ "grpc-http2-min-recv-ping-interval-without-data",
75
+ "grpc-http2-max-ping-strikes",
76
+ "grpc-max-connection-age",
77
+ "grpc-max-connection-age-grace",
78
+ "grpc-restricted-protocol",
79
+ # Sagemaker
80
+ "allow-sagemaker",
81
+ "sagemaker-port",
82
+ "sagemaker-safe-port-range",
83
+ "sagemaker-thread-count",
84
+ # Vertex
85
+ "allow-vertex-ai",
86
+ "vertex-ai-port",
87
+ "vertex-ai-thread-count",
88
+ "vertex-ai-default-model",
89
+ # Metrics
90
+ "allow-metrics",
91
+ "allow-gpu-metrics",
92
+ "allow-cpu-metrics",
93
+ "metrics-address",
94
+ "metrics-port",
95
+ "metrics-interval-ms",
96
+ "metrics-config",
97
+ # Tracing
98
+ "trace-config",
99
+ # Backend
100
+ "backend-directory",
101
+ "backend-config",
102
+ # Repository Agent
103
+ "repoagent-directory",
104
+ # Response Cache
105
+ "cache-config",
106
+ "cache-directory",
107
+ # Rate Limiter
108
+ "rate-limit",
109
+ "rate-limit-resource",
110
+ # Memory/Device Management
111
+ "pinned-memory-pool-byte-size",
112
+ "cuda-memory-pool-byte-size",
113
+ "cuda-virtual-address-size",
114
+ "min-supported-compute-capability",
115
+ "buffer-management-thread-count",
116
+ "host-policy",
117
+ "model-load-gpu-limit",
118
+ # DEPRECATED
119
+ "strict-model-config",
120
+ "response-cache-byte-size",
121
+ "trace-file",
122
+ "trace-level",
123
+ "trace-rate",
124
+ "trace-count",
125
+ "trace-log-frequency",
126
+ ]
127
+
128
+ def __init__(self):
129
+ """
130
+ Construct TritonServerConfig
131
+ """
132
+
133
+ self._server_args = {k: None for k in self.server_arg_keys}
134
+
135
+ @classmethod
136
+ def allowed_keys(cls):
137
+ """
138
+ Returns
139
+ -------
140
+ list of str
141
+ The keys that can be used to configure tritonserver instance
142
+ """
143
+
144
+ snake_cased_keys = [key.replace("-", "_") for key in cls.server_arg_keys]
145
+ return cls.server_arg_keys + snake_cased_keys
146
+
147
+ def update_config(self, params=None):
148
+ """
149
+ Allows setting values from a
150
+ params dict
151
+
152
+ Parameters
153
+ ----------
154
+ params: dict
155
+ keys are allowed args to perf_analyzer
156
+ """
157
+
158
+ if params:
159
+ for key in params:
160
+ self[key.strip().replace("_", "-")] = params[key]
161
+
162
+ def to_cli_string(self):
163
+ """
164
+ Utility function to convert a config into a
165
+ string of arguments to the server with CLI.
166
+
167
+ Returns
168
+ -------
169
+ str
170
+ the command consisting of all set arguments to
171
+ the tritonserver.
172
+ e.g. '--model-repository=/models --log-verbose=True'
173
+ """
174
+
175
+ return " ".join(
176
+ [f"--{key}={val}" for key, val in self._server_args.items() if val]
177
+ )
178
+
179
+ def to_args_list(self):
180
+ """
181
+ Utility function to convert a cli string into a list of arguments while
182
+ taking into account "smart" delimiters. Notice in the example below
183
+ that only the first equals sign is used as split delimiter.
184
+
185
+ Returns
186
+ -------
187
+ list
188
+ the list of arguments consisting of all set arguments to
189
+ the tritonserver.
190
+
191
+ Example:
192
+ input cli_string: "--model-control-mode=explicit
193
+ --backend-config=tensorflow,version=2"
194
+
195
+ output: ['--model-control-mode', 'explicit',
196
+ '--backend-config', 'tensorflow,version=2']
197
+ """
198
+ args_list = []
199
+ args = self.to_cli_string().split()
200
+ for arg in args:
201
+ args_list += arg.split("=", 1)
202
+ return args_list
203
+
204
+ def copy(self):
205
+ """
206
+ Returns
207
+ -------
208
+ TritonServerConfig
209
+ object that has the same args as this one
210
+ """
211
+
212
+ config_copy = TritonServerConfig()
213
+ config_copy.update_config(params=self._server_args)
214
+ return config_copy
215
+
216
+ def server_args(self):
217
+ """
218
+ Returns
219
+ -------
220
+ dict
221
+ keys are server arguments
222
+ values are their values
223
+ """
224
+
225
+ return self._server_args
226
+
227
+ def __getitem__(self, key):
228
+ """
229
+ Gets an arguments value in config
230
+
231
+ Parameters
232
+ ----------
233
+ key : str
234
+ The name of the argument to the tritonserver
235
+
236
+ Returns
237
+ -------
238
+ The value that the argument is set to in this config
239
+ """
240
+
241
+ return self._server_args[key.strip().replace("_", "-")]
242
+
243
+ def __setitem__(self, key, value):
244
+ """
245
+ Sets an arguments value in config
246
+ after checking if defined/supported.
247
+
248
+ Parameters
249
+ ----------
250
+ key : str
251
+ The name of the argument to the tritonserver
252
+ value : (any)
253
+ The value to which the argument is being set
254
+
255
+ Raises
256
+ ------
257
+ TritonModelAnalyzerException
258
+ If key is unsupported or undefined in the
259
+ config class
260
+ """
261
+
262
+ kebab_cased_key = key.strip().replace("_", "-")
263
+ if kebab_cased_key in self._server_args:
264
+ self._server_args[kebab_cased_key] = value
265
+ else:
266
+ raise TritonModelAnalyzerException(
267
+ f"The argument '{key}' to the Triton Inference "
268
+ "Server is not supported by the model analyzer."
269
+ )
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ import tempfile
19
+ from io import TextIOWrapper
20
+ from multiprocessing.pool import ThreadPool
21
+ from subprocess import DEVNULL
22
+
23
+ import docker
24
+
25
+ from model_analyzer.constants import LOGGER_NAME
26
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
27
+
28
+ from .server import TritonServer
29
+
30
+ LOCAL_HTTP_PORT = 8000
31
+ LOCAL_GRPC_PORT = 8001
32
+ LOCAL_METRICS_PORT = 8002
33
+
34
+ logger = logging.getLogger(LOGGER_NAME)
35
+
36
+
37
+ class TritonServerDocker(TritonServer):
38
+ """
39
+ Concrete Implementation of TritonServer interface that runs
40
+ triton in a docker container.
41
+ """
42
+
43
+ def __init__(self, image, config, gpus, log_path, mounts, labels, shm_size, args):
44
+ """
45
+ Parameters
46
+ ----------
47
+ image : str
48
+ The tritonserver docker image to pull and run
49
+ config : TritonServerConfig
50
+ the config object containing arguments for this server instance
51
+ gpus : list of str
52
+ List of GPU UUIDs to be mounted and used in the container
53
+ log_path: str
54
+ Absolute path to the triton log file
55
+ mounts: list of str
56
+ The volumes to be mounted to the tritonserver container
57
+ labels: dict
58
+ name-value pairs for label to set metadata for triton docker
59
+ container. (Not the same as environment variables)
60
+ shm-size: str
61
+ The size of /dev/shm for the triton docker container.
62
+ args: dict
63
+ name-values part for triton docker args
64
+ """
65
+
66
+ self._server_config = config
67
+ self._docker_client = docker.from_env()
68
+ self._tritonserver_image = image
69
+ self._tritonserver_container = None
70
+ self._log_path = log_path
71
+ self._log_file = DEVNULL
72
+ self._mounts = mounts
73
+ self._labels = labels if labels else {}
74
+ self._gpus = gpus
75
+ self._shm_size = shm_size
76
+ self._args = args if args else {}
77
+
78
+ assert self._server_config[
79
+ "model-repository"
80
+ ], "Triton Server requires --model-repository argument to be set."
81
+
82
+ try:
83
+ self._docker_client.images.get(self._tritonserver_image)
84
+ except Exception:
85
+ logger.info(f"Pulling docker image {self._tritonserver_image}")
86
+ self._docker_client.images.pull(self._tritonserver_image)
87
+
88
+ def start(self, env=None):
89
+ """
90
+ Starts the tritonserver docker container using docker-py
91
+ """
92
+
93
+ # List GPUs to be mounted and used inside docker container
94
+ devices = []
95
+ if len(self._gpus):
96
+ devices = [
97
+ docker.types.DeviceRequest(
98
+ device_ids=[gpu.device_uuid() for gpu in self._gpus],
99
+ capabilities=[["gpu"]],
100
+ )
101
+ ]
102
+
103
+ # Set environment inside container.
104
+ # Supports only strings, and value lookups/concats
105
+ env_cmds = [
106
+ f"CUDA_VISIBLE_DEVICES={','.join([gpu.device_uuid() for gpu in self._gpus])}"
107
+ ]
108
+ if env:
109
+ # Set all environment variables inside the container
110
+ for env_variable in list(env):
111
+ env_cmds.append(f"{env_variable}={env[env_variable]}")
112
+
113
+ # Mount required directories
114
+ volumes = {}
115
+ if self._mounts:
116
+ for volume_str in self._mounts:
117
+ host_path, dest, mode = volume_str.split(":")
118
+ volumes[host_path] = {"bind": dest, "mode": mode}
119
+
120
+ volumes[self._server_config["model-repository"]] = {
121
+ "bind": self._server_config["model-repository"],
122
+ "mode": "ro",
123
+ }
124
+
125
+ # Map ports, use config values but set to server defaults if not
126
+ # specified
127
+ server_http_port = self._server_config["http-port"] or 8000
128
+ server_grpc_port = self._server_config["grpc-port"] or 8001
129
+ server_metrics_port = self._server_config["metrics-port"] or 8002
130
+
131
+ ports = {
132
+ server_http_port: server_http_port,
133
+ server_grpc_port: server_grpc_port,
134
+ server_metrics_port: server_metrics_port,
135
+ }
136
+
137
+ # Construct run command
138
+ command = " ".join(
139
+ env_cmds + ["tritonserver", self._server_config.to_cli_string()]
140
+ )
141
+ try:
142
+ # Run the docker container and run the command in the container
143
+ self._tritonserver_container = self._docker_client.containers.run(
144
+ command=f'bash -c "{command}"',
145
+ init=True,
146
+ image=self._tritonserver_image,
147
+ device_requests=devices,
148
+ volumes=volumes,
149
+ labels=self._labels,
150
+ ports=ports,
151
+ publish_all_ports=True,
152
+ tty=False,
153
+ stdin_open=False,
154
+ detach=True,
155
+ shm_size=self._shm_size,
156
+ **self._args,
157
+ )
158
+ logger.debug("Triton Server started.")
159
+ except docker.errors.APIError as e:
160
+ if e.explanation.find("port is already allocated") != -1:
161
+ raise TritonModelAnalyzerException(
162
+ "One of the following port(s) are already allocated: "
163
+ f"{server_http_port}, {server_grpc_port}, "
164
+ f"{server_metrics_port}.\n"
165
+ "Change the Triton server ports using"
166
+ " --triton-http-endpoint, --triton-grpc-endpoint,"
167
+ " and --triton-metrics-endpoint flags."
168
+ )
169
+ else:
170
+ raise TritonModelAnalyzerException(e)
171
+
172
+ if self._log_path:
173
+ try:
174
+ self._log_file = open(self._log_path, "a+")
175
+ self._log_pool = ThreadPool(processes=1)
176
+ self._log_pool.apply_async(self._logging_worker)
177
+ except OSError as e:
178
+ raise TritonModelAnalyzerException(e)
179
+ else:
180
+ self._log_file = tempfile.NamedTemporaryFile()
181
+
182
+ def _logging_worker(self):
183
+ """
184
+ streams logs to
185
+ log file
186
+ """
187
+
188
+ for chunk in self._tritonserver_container.logs(stream=True):
189
+ self._log_file.write(chunk.decode("utf-8"))
190
+
191
+ def stop(self):
192
+ """
193
+ Stops the tritonserver docker container
194
+ and cleans up docker client
195
+ """
196
+
197
+ if self._tritonserver_container is not None:
198
+ if self._log_path:
199
+ if self._log_pool:
200
+ self._log_pool.terminate()
201
+ self._log_pool.close()
202
+ if self._log_file:
203
+ self._log_file.close()
204
+ self._tritonserver_container.stop()
205
+ self._tritonserver_container.remove(force=True)
206
+ self._tritonserver_container = None
207
+ logger.debug("Stopped Triton Server.")
208
+ self._docker_client.close()
209
+
210
+ def cpu_stats(self):
211
+ """
212
+ Returns the CPU memory usage and CPU available memory in MB
213
+ """
214
+
215
+ cmd = "bash -c \"pmap -x $(pgrep tritonserver) | tail -n1 | awk '{print $4}'\""
216
+ _, used_mem_bytes = self._tritonserver_container.exec_run(cmd=cmd, stream=False)
217
+ cmd = "bash -c \"free | awk '{if(NR==2)print $7}'\""
218
+ _, available_mem_bytes = self._tritonserver_container.exec_run(
219
+ cmd=cmd, stream=False
220
+ )
221
+
222
+ # Divide by 1.0e6 to convert from kilobytes to MB
223
+ return (
224
+ float(used_mem_bytes.decode("utf-8")) // 1.0e3,
225
+ float(available_mem_bytes.decode("utf-8")) // 1.0e3,
226
+ )
227
+
228
+ def log_file(self) -> TextIOWrapper:
229
+ return self._log_file