triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,887 @@
1
+ # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ ##
15
+ # Python bindings for the internal API of DCGM library (dcgm_agent.h)
16
+ ##
17
+
18
+ import model_analyzer.monitor.dcgm.dcgm_structs as dcgm_structs
19
+ import model_analyzer.monitor.dcgm.dcgm_fields as dcgm_fields
20
+ from ctypes import *
21
+ import functools
22
+
23
+
24
+ def ensure_byte_strings():
25
+ """
26
+ Ensures that we don't call C APIs with unicode strings in the arguments
27
+ every unicode args gets converted to UTF-8 before the function is called
28
+ """
29
+
30
+ def convert_result_from_bytes(result):
31
+ if isinstance(result, bytes):
32
+ return result.decode('utf-8')
33
+ if isinstance(result, list):
34
+ return list(map(convert_result_from_bytes, result))
35
+ if isinstance(result, tuple):
36
+ return tuple(map(convert_result_from_bytes, result))
37
+ return result
38
+
39
+ def decorator(fn):
40
+
41
+ @functools.wraps(fn)
42
+ def wrapper(*args, **kwargs):
43
+ newargs = []
44
+ newkwargs = {}
45
+ for arg in args:
46
+ if isinstance(arg, str):
47
+ newargs.append(bytes(arg, 'utf-8'))
48
+ else:
49
+ newargs.append(arg)
50
+ for k, v in kwargs.items():
51
+ if isinstance(v, str):
52
+ newkwargs[k] = bytes(v, 'utf-8')
53
+ else:
54
+ newkwargs[k] = v
55
+ newargs = tuple(newargs)
56
+ return fn(*newargs, **newkwargs)
57
+
58
+ return wrapper
59
+
60
+ return decorator
61
+
62
+
63
+ # Provides access to functions from dcgm_agent_internal
64
+ dcgmFP = dcgm_structs._dcgmGetFunctionPointer
65
+
66
+
67
+ # This method is used to initialize DCGM
68
+ @ensure_byte_strings()
69
+ def dcgmInit():
70
+ dcgm_handle = c_void_p()
71
+ fn = dcgmFP("dcgmInit")
72
+ ret = fn(byref(dcgm_handle))
73
+ dcgm_structs._dcgmCheckReturn(ret)
74
+ return ret
75
+
76
+
77
+ # This method is used to shutdown DCGM Engine
78
+ @ensure_byte_strings()
79
+ def dcgmShutdown():
80
+ fn = dcgmFP("dcgmShutdown")
81
+ ret = fn()
82
+ dcgm_structs._dcgmCheckReturn(ret)
83
+ return ret
84
+
85
+
86
+ @ensure_byte_strings()
87
+ def dcgmStartEmbedded(opMode):
88
+ dcgm_handle = c_void_p()
89
+ fn = dcgmFP("dcgmStartEmbedded")
90
+ ret = fn(opMode, byref(dcgm_handle))
91
+ dcgm_structs._dcgmCheckReturn(ret)
92
+ return dcgm_handle
93
+
94
+
95
+ @ensure_byte_strings()
96
+ def dcgmStopEmbedded(dcgm_handle):
97
+ fn = dcgmFP("dcgmStopEmbedded")
98
+ ret = fn(dcgm_handle)
99
+ dcgm_structs._dcgmCheckReturn(ret)
100
+ return ret
101
+
102
+
103
+ @ensure_byte_strings()
104
+ def dcgmConnect(ip_address):
105
+ dcgm_handle = c_void_p()
106
+ fn = dcgmFP("dcgmConnect")
107
+ ret = fn(ip_address, byref(dcgm_handle))
108
+ dcgm_structs._dcgmCheckReturn(ret)
109
+ return dcgm_handle
110
+
111
+
112
+ @ensure_byte_strings()
113
+ def dcgmConnect_v2(ip_address,
114
+ connectParams,
115
+ version=dcgm_structs.c_dcgmConnectV2Params_version):
116
+ connectParams.version = version
117
+ dcgm_handle = c_void_p()
118
+ fn = dcgmFP("dcgmConnect_v2")
119
+ ret = fn(ip_address, byref(connectParams), byref(dcgm_handle))
120
+ dcgm_structs._dcgmCheckReturn(ret)
121
+ return dcgm_handle
122
+
123
+
124
+ @ensure_byte_strings()
125
+ def dcgmDisconnect(dcgm_handle):
126
+ fn = dcgmFP("dcgmDisconnect")
127
+ ret = fn(dcgm_handle)
128
+ dcgm_structs._dcgmCheckReturn(ret)
129
+ return ret
130
+
131
+
132
+ @ensure_byte_strings()
133
+ def dcgmGetAllSupportedDevices(dcgm_handle):
134
+ c_count = c_uint()
135
+ gpuid_list = c_uint * dcgm_structs.DCGM_MAX_NUM_DEVICES
136
+ c_gpuid_list = gpuid_list()
137
+ fn = dcgmFP("dcgmGetAllSupportedDevices")
138
+ ret = fn(dcgm_handle, c_gpuid_list, byref(c_count))
139
+ dcgm_structs._dcgmCheckReturn(ret)
140
+ return list(c_gpuid_list[0:int(c_count.value)])
141
+
142
+
143
+ @ensure_byte_strings()
144
+ def dcgmGetAllDevices(dcgm_handle):
145
+ c_count = c_uint()
146
+ gpuid_list = c_uint * dcgm_structs.DCGM_MAX_NUM_DEVICES
147
+ c_gpuid_list = gpuid_list()
148
+ fn = dcgmFP("dcgmGetAllDevices")
149
+ ret = fn(dcgm_handle, c_gpuid_list, byref(c_count))
150
+ dcgm_structs._dcgmCheckReturn(ret)
151
+ return list(c_gpuid_list[0:int(c_count.value)])
152
+
153
+
154
+ @ensure_byte_strings()
155
+ def dcgmGetDeviceAttributes(dcgm_handle,
156
+ gpuId,
157
+ version=dcgm_structs.dcgmDeviceAttributes_version3):
158
+ fn = dcgmFP("dcgmGetDeviceAttributes")
159
+ if version == dcgm_structs.dcgmDeviceAttributes_version3:
160
+ device_values = dcgm_structs.c_dcgmDeviceAttributes_v3()
161
+ device_values.version = dcgm_structs.dcgmDeviceAttributes_version3
162
+ else:
163
+ dcgm_structs._dcgmCheckReturn(dcgm_structs.DCGM_ST_VER_MISMATCH)
164
+
165
+ ret = fn(dcgm_handle, c_int(gpuId), byref(device_values))
166
+ dcgm_structs._dcgmCheckReturn(ret)
167
+ return device_values
168
+
169
+
170
+ @ensure_byte_strings()
171
+ def dcgmGetEntityGroupEntities(dcgm_handle, entityGroup, flags):
172
+ capacity = dcgm_structs.DCGM_GROUP_MAX_ENTITIES
173
+ c_count = c_int32(capacity)
174
+ entityIds = c_uint32 * capacity
175
+ c_entityIds = entityIds()
176
+ fn = dcgmFP("dcgmGetEntityGroupEntities")
177
+ ret = fn(dcgm_handle, entityGroup, c_entityIds, byref(c_count), flags)
178
+ dcgm_structs._dcgmCheckReturn(ret)
179
+ return c_entityIds[0 : int(c_count.value)]
180
+
181
+
182
+ @ensure_byte_strings()
183
+ def dcgmGetNvLinkLinkStatus(dcgm_handle):
184
+ linkStatus = dcgm_structs.c_dcgmNvLinkStatus_v3()
185
+ linkStatus.version = dcgm_structs.dcgmNvLinkStatus_version3
186
+ fn = dcgmFP("dcgmGetNvLinkLinkStatus")
187
+ ret = fn(dcgm_handle, byref(linkStatus))
188
+ dcgm_structs._dcgmCheckReturn(ret)
189
+ return linkStatus
190
+
191
+
192
+ @ensure_byte_strings()
193
+ def dcgmGetGpuInstanceHierarchy(dcgm_handle):
194
+ hierarchy = dcgm_structs.c_dcgmMigHierarchy_v2()
195
+ hierarchy.version = dcgm_structs.c_dcgmMigHierarchy_version2
196
+ fn = dcgmFP("dcgmGetGpuInstanceHierarchy")
197
+ ret = fn(dcgm_handle, byref(hierarchy))
198
+ dcgm_structs._dcgmCheckReturn(ret)
199
+ return hierarchy
200
+
201
+
202
+ @ensure_byte_strings()
203
+ def dcgmCreateMigEntity(dcgm_handle, parentId, profile, createOption, flags):
204
+ fn = dcgmFP("dcgmCreateMigEntity")
205
+ cme = dcgm_structs.c_dcgmCreateMigEntity_v1()
206
+ cme.version = dcgm_structs.c_dcgmCreateMigEntity_version1
207
+ cme.parentId = parentId
208
+ cme.createOption = createOption
209
+ cme.profile = profile
210
+ cme.flags = flags
211
+ ret = fn(dcgm_handle, byref(cme))
212
+ dcgm_structs._dcgmCheckReturn(ret)
213
+
214
+
215
+ @ensure_byte_strings()
216
+ def dcgmDeleteMigEntity(dcgm_handle, entityGroupId, entityId, flags):
217
+ fn = dcgmFP("dcgmDeleteMigEntity")
218
+ dme = dcgm_structs.c_dcgmDeleteMigEntity_v1()
219
+ dme.version = dcgm_structs.c_dcgmDeleteMigEntity_version1
220
+ dme.entityGroupId = entityGroupId
221
+ dme.entityId = entityId
222
+ dme.flags = flags
223
+ ret = fn(dcgm_handle, byref(dme))
224
+ dcgm_structs._dcgmCheckReturn(ret)
225
+
226
+
227
+ @ensure_byte_strings()
228
+ def dcgmGroupCreate(dcgm_handle, type, groupName):
229
+ c_group_id = c_void_p()
230
+ fn = dcgmFP("dcgmGroupCreate")
231
+ ret = fn(dcgm_handle, type, groupName, byref(c_group_id))
232
+ dcgm_structs._dcgmCheckReturn(ret)
233
+ return c_group_id
234
+
235
+
236
+ @ensure_byte_strings()
237
+ def dcgmGroupDestroy(dcgm_handle, group_id):
238
+ fn = dcgmFP("dcgmGroupDestroy")
239
+ ret = fn(dcgm_handle, group_id)
240
+ dcgm_structs._dcgmCheckReturn(ret)
241
+ return ret
242
+
243
+
244
+ @ensure_byte_strings()
245
+ def dcgmGroupAddDevice(dcgm_handle, group_id, gpu_id):
246
+ fn = dcgmFP("dcgmGroupAddDevice")
247
+ ret = fn(dcgm_handle, group_id, gpu_id)
248
+ dcgm_structs._dcgmCheckReturn(ret)
249
+ return ret
250
+
251
+
252
+ @ensure_byte_strings()
253
+ def dcgmGroupAddEntity(dcgm_handle, group_id, entityGroupId, entityId):
254
+ fn = dcgmFP("dcgmGroupAddEntity")
255
+ ret = fn(dcgm_handle, group_id, entityGroupId, entityId)
256
+ dcgm_structs._dcgmCheckReturn(ret)
257
+ return ret
258
+
259
+
260
+ @ensure_byte_strings()
261
+ def dcgmGroupRemoveDevice(dcgm_handle, group_id, gpu_id):
262
+ fn = dcgmFP("dcgmGroupRemoveDevice")
263
+ ret = fn(dcgm_handle, group_id, gpu_id)
264
+ dcgm_structs._dcgmCheckReturn(ret)
265
+ return ret
266
+
267
+
268
+ @ensure_byte_strings()
269
+ def dcgmGroupRemoveEntity(dcgm_handle, group_id, entityGroupId, entityId):
270
+ fn = dcgmFP("dcgmGroupRemoveEntity")
271
+ ret = fn(dcgm_handle, group_id, entityGroupId, entityId)
272
+ dcgm_structs._dcgmCheckReturn(ret)
273
+ return ret
274
+
275
+
276
+ @ensure_byte_strings()
277
+ def dcgmGroupGetInfo(dcgm_handle,
278
+ group_id,
279
+ version=dcgm_structs.c_dcgmGroupInfo_version2):
280
+ fn = dcgmFP("dcgmGroupGetInfo")
281
+
282
+ #support the old version of the request since the host engine does
283
+ if version == dcgm_structs.c_dcgmGroupInfo_version2:
284
+ device_values = dcgm_structs.c_dcgmGroupInfo_v2()
285
+ device_values.version = dcgm_structs.c_dcgmGroupInfo_version2
286
+ else:
287
+ dcgm_structs._dcgmCheckReturn(dcgm_structs.DCGM_ST_VER_MISMATCH)
288
+
289
+ ret = fn(dcgm_handle, group_id, byref(device_values))
290
+ dcgm_structs._dcgmCheckReturn(ret)
291
+ return device_values
292
+
293
+
294
+ @ensure_byte_strings()
295
+ def dcgmGroupGetAllIds(dcgmHandle):
296
+ fn = dcgmFP("dcgmGroupGetAllIds")
297
+ c_count = c_uint()
298
+ groupIdList = c_void_p * dcgm_structs.DCGM_MAX_NUM_GROUPS
299
+ c_groupIdList = groupIdList()
300
+ ret = fn(dcgmHandle, c_groupIdList, byref(c_count))
301
+ dcgm_structs._dcgmCheckReturn(ret)
302
+ return list(c_groupIdList[0:int(c_count.value)])
303
+
304
+
305
+ @ensure_byte_strings()
306
+ def dcgmFieldGroupCreate(dcgm_handle, fieldIds, fieldGroupName):
307
+ c_field_group_id = c_void_p()
308
+ c_num_field_ids = c_int32(len(fieldIds))
309
+ c_field_ids = (c_uint16 * len(fieldIds))(*fieldIds)
310
+ fn = dcgmFP("dcgmFieldGroupCreate")
311
+ ret = fn(dcgm_handle, c_num_field_ids, byref(c_field_ids), fieldGroupName,
312
+ byref(c_field_group_id))
313
+ dcgm_structs._dcgmCheckReturn(ret)
314
+ return c_field_group_id
315
+
316
+
317
+ @ensure_byte_strings()
318
+ def dcgmFieldGroupDestroy(dcgm_handle, fieldGroupId):
319
+ fn = dcgmFP("dcgmFieldGroupDestroy")
320
+ ret = fn(dcgm_handle, fieldGroupId)
321
+ dcgm_structs._dcgmCheckReturn(ret)
322
+ return ret
323
+
324
+
325
+ @ensure_byte_strings()
326
+ def dcgmFieldGroupGetInfo(dcgm_handle, fieldGroupId):
327
+ c_fieldGroupInfo = dcgm_structs.c_dcgmFieldGroupInfo_v1()
328
+ c_fieldGroupInfo.version = dcgm_structs.dcgmFieldGroupInfo_version1
329
+ c_fieldGroupInfo.fieldGroupId = fieldGroupId
330
+ fn = dcgmFP("dcgmFieldGroupGetInfo")
331
+ ret = fn(dcgm_handle, byref(c_fieldGroupInfo))
332
+ dcgm_structs._dcgmCheckReturn(ret)
333
+ return c_fieldGroupInfo
334
+
335
+
336
+ @ensure_byte_strings()
337
+ def dcgmFieldGroupGetAll(dcgm_handle):
338
+ c_allGroupInfo = dcgm_structs.c_dcgmAllFieldGroup_v1()
339
+ c_allGroupInfo.version = dcgm_structs.dcgmAllFieldGroup_version1
340
+ fn = dcgmFP("dcgmFieldGroupGetAll")
341
+ ret = fn(dcgm_handle, byref(c_allGroupInfo))
342
+ dcgm_structs._dcgmCheckReturn(ret)
343
+ return c_allGroupInfo
344
+
345
+
346
+ @ensure_byte_strings()
347
+ def dcgmStatusCreate():
348
+ c_status_handle = c_void_p()
349
+ fn = dcgmFP("dcgmStatusCreate")
350
+ ret = fn(byref(c_status_handle))
351
+ dcgm_structs._dcgmCheckReturn(ret)
352
+ return c_status_handle
353
+
354
+
355
+ @ensure_byte_strings()
356
+ def dcgmStatusDestroy(status_handle):
357
+ fn = dcgmFP("dcgmStatusDestroy")
358
+ ret = fn(status_handle)
359
+ dcgm_structs._dcgmCheckReturn(ret)
360
+ return ret
361
+
362
+
363
+ @ensure_byte_strings()
364
+ def dcgmStatusGetCount(status_handle):
365
+ c_count = c_uint()
366
+ fn = dcgmFP("dcgmStatusGetCount")
367
+ ret = fn(status_handle, byref(c_count))
368
+ dcgm_structs._dcgmCheckReturn(ret)
369
+ return c_count.value
370
+
371
+
372
+ @ensure_byte_strings()
373
+ def dcgmStatusPopError(status_handle):
374
+ c_errorInfo = dcgm_structs.c_dcgmErrorInfo_v1()
375
+ fn = dcgmFP("dcgmStatusPopError")
376
+ ret = fn(status_handle, byref(c_errorInfo))
377
+ if ret == dcgm_structs.DCGM_ST_OK:
378
+ return c_errorInfo
379
+ else:
380
+ return None
381
+
382
+
383
+ @ensure_byte_strings()
384
+ def dcgmStatusClear(status_handle):
385
+ fn = dcgmFP("dcgmStatusClear")
386
+ ret = fn(status_handle)
387
+ dcgm_structs._dcgmCheckReturn(ret)
388
+ return ret
389
+
390
+
391
+ @ensure_byte_strings()
392
+ def dcgmConfigSet(dcgm_handle, group_id, configToSet, status_handle):
393
+ fn = dcgmFP("dcgmConfigSet")
394
+ configToSet.version = dcgm_structs.dcgmDeviceConfig_version1
395
+ ret = fn(dcgm_handle, group_id, byref(configToSet), status_handle)
396
+ dcgm_structs._dcgmCheckReturn(ret)
397
+ return ret
398
+
399
+
400
+ @ensure_byte_strings()
401
+ def dcgmConfigGet(dcgm_handle, group_id, reqCfgType, count, status_handle):
402
+ fn = dcgmFP("dcgmConfigGet")
403
+
404
+ config_values_array = count * dcgm_structs.c_dcgmDeviceConfig_v1
405
+ c_config_values = config_values_array()
406
+
407
+ for index in range(0, count):
408
+ c_config_values[index].version = dcgm_structs.dcgmDeviceConfig_version1
409
+
410
+ ret = fn(dcgm_handle, group_id, reqCfgType, count, c_config_values,
411
+ status_handle)
412
+ dcgm_structs._dcgmCheckReturn(ret)
413
+ return list(c_config_values[0:count])
414
+
415
+
416
+ @ensure_byte_strings()
417
+ def dcgmConfigEnforce(dcgm_handle, group_id, status_handle):
418
+ fn = dcgmFP("dcgmConfigEnforce")
419
+ ret = fn(dcgm_handle, group_id, status_handle)
420
+ dcgm_structs._dcgmCheckReturn(ret)
421
+ return ret
422
+
423
+
424
+ # This method is used to tell the cache manager to update all fields
425
+ @ensure_byte_strings()
426
+ def dcgmUpdateAllFields(dcgm_handle, waitForUpdate):
427
+ fn = dcgmFP("dcgmUpdateAllFields")
428
+ ret = fn(dcgm_handle, c_int(waitForUpdate))
429
+ dcgm_structs._dcgmCheckReturn(ret)
430
+ return ret
431
+
432
+
433
+ # This method is used to get the policy information
434
+ @ensure_byte_strings()
435
+ def dcgmPolicyGet(dcgm_handle, group_id, count, status_handle):
436
+ fn = dcgmFP("dcgmPolicyGet")
437
+ policy_array = count * dcgm_structs.c_dcgmPolicy_v1
438
+
439
+ c_policy_values = policy_array()
440
+
441
+ for index in range(0, count):
442
+ c_policy_values[index].version = dcgm_structs.dcgmPolicy_version1
443
+
444
+ ret = fn(dcgm_handle, group_id, count, c_policy_values, status_handle)
445
+ dcgm_structs._dcgmCheckReturn(ret)
446
+ return c_policy_values[0:count]
447
+
448
+
449
+ # This method is used to set the policy information
450
+ @ensure_byte_strings()
451
+ def dcgmPolicySet(dcgm_handle, group_id, policy, status_handle):
452
+ fn = dcgmFP("dcgmPolicySet")
453
+ ret = fn(dcgm_handle, group_id, byref(policy), status_handle)
454
+ dcgm_structs._dcgmCheckReturn(ret)
455
+ return ret
456
+
457
+
458
+ #First parameter below is the return type
459
+ dcgmFieldValueEnumeration_f = CFUNCTYPE(
460
+ c_int32, c_uint32, POINTER(dcgm_structs.c_dcgmFieldValue_v1), c_int32,
461
+ c_void_p)
462
+ dcgmFieldValueEntityEnumeration_f = CFUNCTYPE(
463
+ c_int32, c_uint32, c_uint32, POINTER(dcgm_structs.c_dcgmFieldValue_v1),
464
+ c_int32, c_void_p)
465
+
466
+
467
+ @ensure_byte_strings()
468
+ def dcgmGetValuesSince(dcgm_handle, groupId, fieldGroupId, sinceTimestamp,
469
+ enumCB, userData):
470
+ fn = dcgmFP("dcgmGetValuesSince")
471
+ c_nextSinceTimestamp = c_int64()
472
+ ret = fn(dcgm_handle, groupId, fieldGroupId, c_int64(sinceTimestamp),
473
+ byref(c_nextSinceTimestamp), enumCB, py_object(userData))
474
+ dcgm_structs._dcgmCheckReturn(ret)
475
+ return c_nextSinceTimestamp.value
476
+
477
+
478
+ @ensure_byte_strings()
479
+ def dcgmGetValuesSince_v2(dcgm_handle, groupId, fieldGroupId, sinceTimestamp,
480
+ enumCB, userData):
481
+ fn = dcgmFP("dcgmGetValuesSince_v2")
482
+ c_nextSinceTimestamp = c_int64()
483
+ ret = fn(dcgm_handle, groupId, fieldGroupId, c_int64(sinceTimestamp),
484
+ byref(c_nextSinceTimestamp), enumCB, py_object(userData))
485
+ dcgm_structs._dcgmCheckReturn(ret)
486
+ return c_nextSinceTimestamp.value
487
+
488
+
489
+ @ensure_byte_strings()
490
+ def dcgmGetLatestValues(dcgm_handle, groupId, fieldGroupId, enumCB, userData):
491
+ fn = dcgmFP("dcgmGetLatestValues")
492
+ ret = fn(dcgm_handle, groupId, fieldGroupId, enumCB, py_object(userData))
493
+ dcgm_structs._dcgmCheckReturn(ret)
494
+ return ret
495
+
496
+
497
+ @ensure_byte_strings()
498
+ def dcgmGetLatestValues_v2(dcgm_handle, groupId, fieldGroupId, enumCB,
499
+ userData):
500
+ fn = dcgmFP("dcgmGetLatestValues_v2")
501
+ ret = fn(dcgm_handle, groupId, fieldGroupId, enumCB, py_object(userData))
502
+ dcgm_structs._dcgmCheckReturn(ret)
503
+ return ret
504
+
505
+
506
+ @ensure_byte_strings()
507
+ def dcgmWatchFields(dcgm_handle, groupId, fieldGroupId, updateFreq, maxKeepAge,
508
+ maxKeepSamples):
509
+ fn = dcgmFP("dcgmWatchFields")
510
+ ret = fn(dcgm_handle, groupId, fieldGroupId, c_int64(updateFreq),
511
+ c_double(maxKeepAge), c_int32(maxKeepSamples))
512
+ dcgm_structs._dcgmCheckReturn(ret)
513
+ return ret
514
+
515
+
516
+ @ensure_byte_strings()
517
+ def dcgmUnwatchFields(dcgm_handle, groupId, fieldGroupId):
518
+ fn = dcgmFP("dcgmUnwatchFields")
519
+ ret = fn(dcgm_handle, groupId, fieldGroupId)
520
+ dcgm_structs._dcgmCheckReturn(ret)
521
+ return ret
522
+
523
+
524
+ @ensure_byte_strings()
525
+ def dcgmHealthSet(dcgm_handle, groupId, systems):
526
+ fn = dcgmFP("dcgmHealthSet")
527
+ ret = fn(dcgm_handle, groupId, systems)
528
+ dcgm_structs._dcgmCheckReturn(ret)
529
+ return ret
530
+
531
+
532
+ @ensure_byte_strings()
533
+ def dcgmHealthSet_v2(dcgm_handle, groupId, systems, updateInterval, maxKeepAge):
534
+ params = dcgm_structs.c_dcgmHealthSetParams_v2()
535
+ params.version = dcgm_structs.dcgmHealthSetParams_version2
536
+ params.groupId = groupId
537
+ params.systems = systems
538
+ params.updateInterval = updateInterval
539
+ params.maxKeepAge = maxKeepAge
540
+
541
+ fn = dcgmFP("dcgmHealthSet_v2")
542
+ ret = fn(dcgm_handle, byref(params))
543
+ dcgm_structs._dcgmCheckReturn(ret)
544
+ return ret
545
+
546
+
547
+ @ensure_byte_strings()
548
+ def dcgmHealthGet(dcgm_handle, groupId):
549
+ c_systems = c_int32()
550
+ fn = dcgmFP("dcgmHealthGet")
551
+ ret = fn(dcgm_handle, groupId, byref(c_systems))
552
+ dcgm_structs._dcgmCheckReturn(ret)
553
+ return c_systems.value
554
+
555
+
556
+ @ensure_byte_strings()
557
+ def dcgmHealthCheck(dcgm_handle,
558
+ groupId,
559
+ version=dcgm_structs.dcgmHealthResponse_version4):
560
+ if version != dcgm_structs.dcgmHealthResponse_version4:
561
+ dcgm_structs._dcgmCheckReturn(dcgm_structs.DCGM_ST_VER_MISMATCH)
562
+
563
+ c_results = dcgm_structs.c_dcgmHealthResponse_v4()
564
+ c_results.version = dcgm_structs.dcgmHealthResponse_version4
565
+ fn = dcgmFP("dcgmHealthCheck")
566
+ ret = fn(dcgm_handle, groupId, byref(c_results))
567
+ dcgm_structs._dcgmCheckReturn(ret)
568
+ return c_results
569
+
570
+
571
+ @ensure_byte_strings()
572
+ def dcgmPolicyRegister(dcgm_handle, groupId, condition, beginCallback,
573
+ finishCallback):
574
+ fn = dcgmFP("dcgmPolicyRegister")
575
+ ret = fn(dcgm_handle, groupId, condition, beginCallback, finishCallback)
576
+ dcgm_structs._dcgmCheckReturn(ret)
577
+ return ret
578
+
579
+
580
+ @ensure_byte_strings()
581
+ def dcgmPolicyUnregister(dcgm_handle, groupId, condition):
582
+ fn = dcgmFP("dcgmPolicyUnregister")
583
+ ret = fn(dcgm_handle, groupId, condition)
584
+ dcgm_structs._dcgmCheckReturn(ret)
585
+ return ret
586
+
587
+
588
+ @ensure_byte_strings()
589
+ def dcgmPolicyTrigger(dcgm_handle):
590
+ fn = dcgmFP("dcgmPolicyTrigger")
591
+ ret = fn(dcgm_handle)
592
+ dcgm_structs._dcgmCheckReturn(ret)
593
+ return ret
594
+
595
+
596
+ def helperDiagCheckReturn(ret, response):
597
+ try:
598
+ dcgm_structs._dcgmCheckReturn(ret)
599
+ except dcgm_structs.DCGMError as e:
600
+ if response.systemError.msg != "":
601
+ # Add systemError information to the raised exception.
602
+ import sys
603
+
604
+ info = "%s" % response.systemError.msg
605
+ e.SetAdditionalInfo(info)
606
+ raise e
607
+ else:
608
+ raise
609
+
610
+ return response
611
+
612
+
613
+ @ensure_byte_strings()
614
+ def dcgmActionValidate_v2(dcgm_handle,
615
+ runDiagInfo,
616
+ runDiagVersion=dcgm_structs.dcgmRunDiag_version7):
617
+ response = dcgm_structs.c_dcgmDiagResponse_v8()
618
+ runDiagInfo.version = runDiagVersion
619
+ response.version = dcgm_structs.dcgmDiagResponse_version8
620
+ fn = dcgmFP("dcgmActionValidate_v2")
621
+ ret = fn(dcgm_handle, byref(runDiagInfo), byref(response))
622
+
623
+ return helperDiagCheckReturn(ret, response)
624
+
625
+
626
+ @ensure_byte_strings()
627
+ def dcgmActionValidate(dcgm_handle, group_id, validate):
628
+ response = dcgm_structs.c_dcgmDiagResponse_v8()
629
+ response.version = dcgm_structs.dcgmDiagResponse_version8
630
+
631
+ # Put the group_id and validate into a dcgmRunDiag struct
632
+ runDiagInfo = dcgm_structs.c_dcgmRunDiag_v7()
633
+ runDiagInfo.version = dcgm_structs.dcgmRunDiag_version7
634
+ runDiagInfo.validate = validate
635
+ runDiagInfo.groupId = group_id
636
+
637
+ fn = dcgmFP("dcgmActionValidate_v2")
638
+ ret = fn(dcgm_handle, byref(runDiagInfo), byref(response))
639
+
640
+ return helperDiagCheckReturn(ret, response)
641
+
642
+
643
+ @ensure_byte_strings()
644
+ def dcgmRunDiagnostic(dcgm_handle, group_id, diagLevel):
645
+ response = dcgm_structs.c_dcgmDiagResponse_v8()
646
+ response.version = dcgm_structs.dcgmDiagResponse_version8
647
+ fn = dcgmFP("dcgmRunDiagnostic")
648
+ ret = fn(dcgm_handle, group_id, diagLevel, byref(response))
649
+
650
+ return helperDiagCheckReturn(ret, response)
651
+
652
+
653
+ @ensure_byte_strings()
654
+ def dcgmWatchPidFields(dcgm_handle, groupId, updateFreq, maxKeepAge,
655
+ maxKeepSamples):
656
+ fn = dcgmFP("dcgmWatchPidFields")
657
+ ret = fn(dcgm_handle, groupId, c_int64(updateFreq), c_double(maxKeepAge),
658
+ c_int32(maxKeepSamples))
659
+ dcgm_structs._dcgmCheckReturn(ret)
660
+ return ret
661
+
662
+
663
+ @ensure_byte_strings()
664
+ def dcgmGetPidInfo(dcgm_handle, groupId, pid):
665
+ fn = dcgmFP("dcgmGetPidInfo")
666
+ pidInfo = dcgm_structs.c_dcgmPidInfo_v2()
667
+
668
+ pidInfo.version = dcgm_structs.dcgmPidInfo_version2
669
+ pidInfo.pid = pid
670
+
671
+ ret = fn(dcgm_handle, groupId, byref(pidInfo))
672
+ dcgm_structs._dcgmCheckReturn(ret)
673
+ return pidInfo
674
+
675
+
676
+ @ensure_byte_strings()
677
+ def dcgmGetDeviceTopology(dcgm_handle, gpuId):
678
+ devtopo = dcgm_structs.c_dcgmDeviceTopology_v1()
679
+ fn = dcgmFP("dcgmGetDeviceTopology")
680
+ ret = fn(dcgm_handle, gpuId, byref(devtopo))
681
+ dcgm_structs._dcgmCheckReturn(ret)
682
+ return devtopo
683
+
684
+
685
+ @ensure_byte_strings()
686
+ def dcgmGetGroupTopology(dcgm_handle, groupId):
687
+ grouptopo = dcgm_structs.c_dcgmGroupTopology_v1()
688
+ fn = dcgmFP("dcgmGetGroupTopology")
689
+ ret = fn(dcgm_handle, groupId, byref(grouptopo))
690
+ dcgm_structs._dcgmCheckReturn(ret)
691
+ return grouptopo
692
+
693
+
694
+ @ensure_byte_strings()
695
+ def dcgmWatchJobFields(dcgm_handle, groupId, updateFreq, maxKeepAge,
696
+ maxKeepSamples):
697
+ fn = dcgmFP("dcgmWatchJobFields")
698
+ ret = fn(dcgm_handle, groupId, c_int64(updateFreq), c_double(maxKeepAge),
699
+ c_int32(maxKeepSamples))
700
+ dcgm_structs._dcgmCheckReturn(ret)
701
+ return ret
702
+
703
+
704
+ @ensure_byte_strings()
705
+ def dcgmJobStartStats(dcgm_handle, groupId, jobid):
706
+ fn = dcgmFP("dcgmJobStartStats")
707
+ ret = fn(dcgm_handle, groupId, jobid)
708
+ dcgm_structs._dcgmCheckReturn(ret)
709
+ return ret
710
+
711
+
712
+ @ensure_byte_strings()
713
+ def dcgmJobStopStats(dcgm_handle, jobid):
714
+ fn = dcgmFP("dcgmJobStopStats")
715
+ ret = fn(dcgm_handle, jobid)
716
+ dcgm_structs._dcgmCheckReturn(ret)
717
+ return ret
718
+
719
+
720
+ @ensure_byte_strings()
721
+ def dcgmJobGetStats(dcgm_handle, jobid):
722
+ fn = dcgmFP("dcgmJobGetStats")
723
+ jobInfo = dcgm_structs.c_dcgmJobInfo_v3()
724
+
725
+ jobInfo.version = dcgm_structs.dcgmJobInfo_version3
726
+
727
+ ret = fn(dcgm_handle, jobid, byref(jobInfo))
728
+ dcgm_structs._dcgmCheckReturn(ret)
729
+ return jobInfo
730
+
731
+
732
+ @ensure_byte_strings()
733
+ def dcgmJobRemove(dcgm_handle, jobid):
734
+ fn = dcgmFP("dcgmJobRemove")
735
+ ret = fn(dcgm_handle, jobid)
736
+ dcgm_structs._dcgmCheckReturn(ret)
737
+ return ret
738
+
739
+
740
+ @ensure_byte_strings()
741
+ def dcgmJobRemoveAll(dcgm_handle):
742
+ fn = dcgmFP("dcgmJobRemoveAll")
743
+ ret = fn(dcgm_handle)
744
+ dcgm_structs._dcgmCheckReturn(ret)
745
+ return ret
746
+
747
+
748
+ @ensure_byte_strings()
749
+ def dcgmIntrospectGetHostengineMemoryUsage(dcgm_handle, waitIfNoData=True):
750
+ fn = dcgmFP("dcgmIntrospectGetHostengineMemoryUsage")
751
+
752
+ memInfo = dcgm_structs.c_dcgmIntrospectMemory_v1()
753
+ memInfo.version = dcgm_structs.dcgmIntrospectMemory_version1
754
+
755
+ ret = fn(dcgm_handle, byref(memInfo), waitIfNoData)
756
+ dcgm_structs._dcgmCheckReturn(ret)
757
+ return memInfo
758
+
759
+
760
+ @ensure_byte_strings()
761
+ def dcgmIntrospectGetHostengineCpuUtilization(dcgm_handle, waitIfNoData=True):
762
+ fn = dcgmFP("dcgmIntrospectGetHostengineCpuUtilization")
763
+
764
+ cpuUtil = dcgm_structs.c_dcgmIntrospectCpuUtil_v1()
765
+ cpuUtil.version = dcgm_structs.dcgmIntrospectCpuUtil_version1
766
+
767
+ ret = fn(dcgm_handle, byref(cpuUtil), waitIfNoData)
768
+ dcgm_structs._dcgmCheckReturn(ret)
769
+ return cpuUtil
770
+
771
+
772
+ @ensure_byte_strings()
773
+ def dcgmEntityGetLatestValues(dcgmHandle, entityGroup, entityId, fieldIds):
774
+ fn = dcgmFP("dcgmEntityGetLatestValues")
775
+ field_values = (dcgm_structs.c_dcgmFieldValue_v1 * len(fieldIds))()
776
+ id_values = (c_uint16 * len(fieldIds))(*fieldIds)
777
+ ret = fn(dcgmHandle, c_uint(entityGroup),
778
+ dcgm_fields.c_dcgm_field_eid_t(entityId), id_values,
779
+ c_uint(len(fieldIds)), field_values)
780
+ dcgm_structs._dcgmCheckReturn(ret)
781
+ return field_values
782
+
783
+
784
+ @ensure_byte_strings()
785
+ def dcgmEntitiesGetLatestValues(dcgmHandle, entities, fieldIds, flags):
786
+ fn = dcgmFP("dcgmEntitiesGetLatestValues")
787
+ numFvs = len(fieldIds) * len(entities)
788
+ field_values = (dcgm_structs.c_dcgmFieldValue_v2 * numFvs)()
789
+ entities_values = (dcgm_structs.c_dcgmGroupEntityPair_t *
790
+ len(entities))(*entities)
791
+ field_id_values = (c_uint16 * len(fieldIds))(*fieldIds)
792
+ ret = fn(dcgmHandle, entities_values, c_uint(len(entities)),
793
+ field_id_values, c_uint(len(fieldIds)), flags, field_values)
794
+ dcgm_structs._dcgmCheckReturn(ret)
795
+ return field_values
796
+
797
+
798
+ @ensure_byte_strings()
799
+ def dcgmSelectGpusByTopology(dcgmHandle, inputGpuIds, numGpus, hintFlags):
800
+ fn = dcgmFP("dcgmSelectGpusByTopology")
801
+ outputGpuIds = c_int64()
802
+ ret = fn(dcgmHandle, c_uint64(inputGpuIds), c_uint32(numGpus),
803
+ byref(outputGpuIds), c_uint64(hintFlags))
804
+ dcgm_structs._dcgmCheckReturn(ret)
805
+ return outputGpuIds
806
+
807
+
808
+ @ensure_byte_strings()
809
+ def dcgmGetFieldSummary(dcgmHandle, fieldId, entityGroupType, entityId,
810
+ summaryMask, startTime, endTime):
811
+ fn = dcgmFP("dcgmGetFieldSummary")
812
+ request = dcgm_structs.c_dcgmFieldSummaryRequest_v1()
813
+ request.version = dcgm_structs.dcgmFieldSummaryRequest_version1
814
+ request.fieldId = fieldId
815
+ request.entityGroupType = entityGroupType
816
+ request.entityId = entityId
817
+ request.summaryTypeMask = summaryMask
818
+ request.startTime = startTime
819
+ request.endTime = endTime
820
+ ret = fn(dcgmHandle, byref(request))
821
+ dcgm_structs._dcgmCheckReturn(ret)
822
+ return request
823
+
824
+
825
+ @ensure_byte_strings()
826
+ def dcgmModuleDenylist(dcgmHandle, moduleId):
827
+ fn = dcgmFP("dcgmModuleDenylist")
828
+ ret = fn(dcgmHandle, c_uint32(moduleId))
829
+ dcgm_structs._dcgmCheckReturn(ret)
830
+ return ret
831
+
832
+
833
+ @ensure_byte_strings()
834
+ def dcgmModuleGetStatuses(dcgmHandle):
835
+ moduleStatuses = dcgm_structs.c_dcgmModuleGetStatuses_v1()
836
+ moduleStatuses.version = dcgm_structs.dcgmModuleGetStatuses_version1
837
+ fn = dcgmFP("dcgmModuleGetStatuses")
838
+ ret = fn(dcgmHandle, byref(moduleStatuses))
839
+ dcgm_structs._dcgmCheckReturn(ret)
840
+ return moduleStatuses
841
+
842
+
843
+ @ensure_byte_strings()
844
+ def dcgmProfGetSupportedMetricGroups(dcgmHandle, gpuId):
845
+ msg = dcgm_structs.c_dcgmProfGetMetricGroups_v3()
846
+ msg.version = dcgm_structs.dcgmProfGetMetricGroups_version3
847
+ msg.gpuId = gpuId
848
+ fn = dcgmFP("dcgmProfGetSupportedMetricGroups")
849
+ ret = fn(dcgmHandle, byref(msg))
850
+ dcgm_structs._dcgmCheckReturn(ret)
851
+ return msg
852
+
853
+
854
+ @ensure_byte_strings()
855
+ def dcgmProfPause(dcgmHandle):
856
+ fn = dcgmFP("dcgmProfPause")
857
+ ret = fn(dcgmHandle)
858
+ dcgm_structs._dcgmCheckReturn(ret)
859
+ return ret
860
+
861
+
862
+ @ensure_byte_strings()
863
+ def dcgmProfResume(dcgmHandle):
864
+ fn = dcgmFP("dcgmProfResume")
865
+ ret = fn(dcgmHandle)
866
+ dcgm_structs._dcgmCheckReturn(ret)
867
+ return ret
868
+
869
+
870
+ @ensure_byte_strings()
871
+ def dcgmVersionInfo():
872
+ msg = dcgm_structs.c_dcgmVersionInfo_v2()
873
+ msg.version = dcgm_structs.dcgmVersionInfo_version2
874
+ fn = dcgmFP("dcgmVersionInfo")
875
+ ret = fn(byref(msg))
876
+ dcgm_structs._dcgmCheckReturn(ret)
877
+ return msg
878
+
879
+
880
+ @ensure_byte_strings()
881
+ def dcgmHostengineIsHealthy(dcgmHandle):
882
+ heHealth = dcgm_structs.c_dcgmHostengineHealth_v1()
883
+ heHealth.version = dcgm_structs.dcgmHostengineHealth_version1
884
+ fn = dcgmFP("dcgmHostengineIsHealthy")
885
+ ret = fn(dcgmHandle, byref(heHealth))
886
+ dcgm_structs._dcgmCheckReturn(ret)
887
+ return heHealth