triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,887 @@
|
|
|
1
|
+
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
##
|
|
15
|
+
# Python bindings for the internal API of DCGM library (dcgm_agent.h)
|
|
16
|
+
##
|
|
17
|
+
|
|
18
|
+
import model_analyzer.monitor.dcgm.dcgm_structs as dcgm_structs
|
|
19
|
+
import model_analyzer.monitor.dcgm.dcgm_fields as dcgm_fields
|
|
20
|
+
from ctypes import *
|
|
21
|
+
import functools
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def ensure_byte_strings():
|
|
25
|
+
"""
|
|
26
|
+
Ensures that we don't call C APIs with unicode strings in the arguments
|
|
27
|
+
every unicode args gets converted to UTF-8 before the function is called
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def convert_result_from_bytes(result):
|
|
31
|
+
if isinstance(result, bytes):
|
|
32
|
+
return result.decode('utf-8')
|
|
33
|
+
if isinstance(result, list):
|
|
34
|
+
return list(map(convert_result_from_bytes, result))
|
|
35
|
+
if isinstance(result, tuple):
|
|
36
|
+
return tuple(map(convert_result_from_bytes, result))
|
|
37
|
+
return result
|
|
38
|
+
|
|
39
|
+
def decorator(fn):
|
|
40
|
+
|
|
41
|
+
@functools.wraps(fn)
|
|
42
|
+
def wrapper(*args, **kwargs):
|
|
43
|
+
newargs = []
|
|
44
|
+
newkwargs = {}
|
|
45
|
+
for arg in args:
|
|
46
|
+
if isinstance(arg, str):
|
|
47
|
+
newargs.append(bytes(arg, 'utf-8'))
|
|
48
|
+
else:
|
|
49
|
+
newargs.append(arg)
|
|
50
|
+
for k, v in kwargs.items():
|
|
51
|
+
if isinstance(v, str):
|
|
52
|
+
newkwargs[k] = bytes(v, 'utf-8')
|
|
53
|
+
else:
|
|
54
|
+
newkwargs[k] = v
|
|
55
|
+
newargs = tuple(newargs)
|
|
56
|
+
return fn(*newargs, **newkwargs)
|
|
57
|
+
|
|
58
|
+
return wrapper
|
|
59
|
+
|
|
60
|
+
return decorator
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# Provides access to functions from dcgm_agent_internal
|
|
64
|
+
dcgmFP = dcgm_structs._dcgmGetFunctionPointer
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# This method is used to initialize DCGM
|
|
68
|
+
@ensure_byte_strings()
|
|
69
|
+
def dcgmInit():
|
|
70
|
+
dcgm_handle = c_void_p()
|
|
71
|
+
fn = dcgmFP("dcgmInit")
|
|
72
|
+
ret = fn(byref(dcgm_handle))
|
|
73
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
74
|
+
return ret
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# This method is used to shutdown DCGM Engine
|
|
78
|
+
@ensure_byte_strings()
|
|
79
|
+
def dcgmShutdown():
|
|
80
|
+
fn = dcgmFP("dcgmShutdown")
|
|
81
|
+
ret = fn()
|
|
82
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
83
|
+
return ret
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@ensure_byte_strings()
|
|
87
|
+
def dcgmStartEmbedded(opMode):
|
|
88
|
+
dcgm_handle = c_void_p()
|
|
89
|
+
fn = dcgmFP("dcgmStartEmbedded")
|
|
90
|
+
ret = fn(opMode, byref(dcgm_handle))
|
|
91
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
92
|
+
return dcgm_handle
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@ensure_byte_strings()
|
|
96
|
+
def dcgmStopEmbedded(dcgm_handle):
|
|
97
|
+
fn = dcgmFP("dcgmStopEmbedded")
|
|
98
|
+
ret = fn(dcgm_handle)
|
|
99
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
100
|
+
return ret
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@ensure_byte_strings()
|
|
104
|
+
def dcgmConnect(ip_address):
|
|
105
|
+
dcgm_handle = c_void_p()
|
|
106
|
+
fn = dcgmFP("dcgmConnect")
|
|
107
|
+
ret = fn(ip_address, byref(dcgm_handle))
|
|
108
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
109
|
+
return dcgm_handle
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@ensure_byte_strings()
|
|
113
|
+
def dcgmConnect_v2(ip_address,
|
|
114
|
+
connectParams,
|
|
115
|
+
version=dcgm_structs.c_dcgmConnectV2Params_version):
|
|
116
|
+
connectParams.version = version
|
|
117
|
+
dcgm_handle = c_void_p()
|
|
118
|
+
fn = dcgmFP("dcgmConnect_v2")
|
|
119
|
+
ret = fn(ip_address, byref(connectParams), byref(dcgm_handle))
|
|
120
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
121
|
+
return dcgm_handle
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@ensure_byte_strings()
|
|
125
|
+
def dcgmDisconnect(dcgm_handle):
|
|
126
|
+
fn = dcgmFP("dcgmDisconnect")
|
|
127
|
+
ret = fn(dcgm_handle)
|
|
128
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
129
|
+
return ret
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@ensure_byte_strings()
|
|
133
|
+
def dcgmGetAllSupportedDevices(dcgm_handle):
|
|
134
|
+
c_count = c_uint()
|
|
135
|
+
gpuid_list = c_uint * dcgm_structs.DCGM_MAX_NUM_DEVICES
|
|
136
|
+
c_gpuid_list = gpuid_list()
|
|
137
|
+
fn = dcgmFP("dcgmGetAllSupportedDevices")
|
|
138
|
+
ret = fn(dcgm_handle, c_gpuid_list, byref(c_count))
|
|
139
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
140
|
+
return list(c_gpuid_list[0:int(c_count.value)])
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@ensure_byte_strings()
|
|
144
|
+
def dcgmGetAllDevices(dcgm_handle):
|
|
145
|
+
c_count = c_uint()
|
|
146
|
+
gpuid_list = c_uint * dcgm_structs.DCGM_MAX_NUM_DEVICES
|
|
147
|
+
c_gpuid_list = gpuid_list()
|
|
148
|
+
fn = dcgmFP("dcgmGetAllDevices")
|
|
149
|
+
ret = fn(dcgm_handle, c_gpuid_list, byref(c_count))
|
|
150
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
151
|
+
return list(c_gpuid_list[0:int(c_count.value)])
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@ensure_byte_strings()
|
|
155
|
+
def dcgmGetDeviceAttributes(dcgm_handle,
|
|
156
|
+
gpuId,
|
|
157
|
+
version=dcgm_structs.dcgmDeviceAttributes_version3):
|
|
158
|
+
fn = dcgmFP("dcgmGetDeviceAttributes")
|
|
159
|
+
if version == dcgm_structs.dcgmDeviceAttributes_version3:
|
|
160
|
+
device_values = dcgm_structs.c_dcgmDeviceAttributes_v3()
|
|
161
|
+
device_values.version = dcgm_structs.dcgmDeviceAttributes_version3
|
|
162
|
+
else:
|
|
163
|
+
dcgm_structs._dcgmCheckReturn(dcgm_structs.DCGM_ST_VER_MISMATCH)
|
|
164
|
+
|
|
165
|
+
ret = fn(dcgm_handle, c_int(gpuId), byref(device_values))
|
|
166
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
167
|
+
return device_values
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@ensure_byte_strings()
|
|
171
|
+
def dcgmGetEntityGroupEntities(dcgm_handle, entityGroup, flags):
|
|
172
|
+
capacity = dcgm_structs.DCGM_GROUP_MAX_ENTITIES
|
|
173
|
+
c_count = c_int32(capacity)
|
|
174
|
+
entityIds = c_uint32 * capacity
|
|
175
|
+
c_entityIds = entityIds()
|
|
176
|
+
fn = dcgmFP("dcgmGetEntityGroupEntities")
|
|
177
|
+
ret = fn(dcgm_handle, entityGroup, c_entityIds, byref(c_count), flags)
|
|
178
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
179
|
+
return c_entityIds[0 : int(c_count.value)]
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@ensure_byte_strings()
|
|
183
|
+
def dcgmGetNvLinkLinkStatus(dcgm_handle):
|
|
184
|
+
linkStatus = dcgm_structs.c_dcgmNvLinkStatus_v3()
|
|
185
|
+
linkStatus.version = dcgm_structs.dcgmNvLinkStatus_version3
|
|
186
|
+
fn = dcgmFP("dcgmGetNvLinkLinkStatus")
|
|
187
|
+
ret = fn(dcgm_handle, byref(linkStatus))
|
|
188
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
189
|
+
return linkStatus
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
@ensure_byte_strings()
|
|
193
|
+
def dcgmGetGpuInstanceHierarchy(dcgm_handle):
|
|
194
|
+
hierarchy = dcgm_structs.c_dcgmMigHierarchy_v2()
|
|
195
|
+
hierarchy.version = dcgm_structs.c_dcgmMigHierarchy_version2
|
|
196
|
+
fn = dcgmFP("dcgmGetGpuInstanceHierarchy")
|
|
197
|
+
ret = fn(dcgm_handle, byref(hierarchy))
|
|
198
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
199
|
+
return hierarchy
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@ensure_byte_strings()
|
|
203
|
+
def dcgmCreateMigEntity(dcgm_handle, parentId, profile, createOption, flags):
|
|
204
|
+
fn = dcgmFP("dcgmCreateMigEntity")
|
|
205
|
+
cme = dcgm_structs.c_dcgmCreateMigEntity_v1()
|
|
206
|
+
cme.version = dcgm_structs.c_dcgmCreateMigEntity_version1
|
|
207
|
+
cme.parentId = parentId
|
|
208
|
+
cme.createOption = createOption
|
|
209
|
+
cme.profile = profile
|
|
210
|
+
cme.flags = flags
|
|
211
|
+
ret = fn(dcgm_handle, byref(cme))
|
|
212
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
@ensure_byte_strings()
|
|
216
|
+
def dcgmDeleteMigEntity(dcgm_handle, entityGroupId, entityId, flags):
|
|
217
|
+
fn = dcgmFP("dcgmDeleteMigEntity")
|
|
218
|
+
dme = dcgm_structs.c_dcgmDeleteMigEntity_v1()
|
|
219
|
+
dme.version = dcgm_structs.c_dcgmDeleteMigEntity_version1
|
|
220
|
+
dme.entityGroupId = entityGroupId
|
|
221
|
+
dme.entityId = entityId
|
|
222
|
+
dme.flags = flags
|
|
223
|
+
ret = fn(dcgm_handle, byref(dme))
|
|
224
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@ensure_byte_strings()
|
|
228
|
+
def dcgmGroupCreate(dcgm_handle, type, groupName):
|
|
229
|
+
c_group_id = c_void_p()
|
|
230
|
+
fn = dcgmFP("dcgmGroupCreate")
|
|
231
|
+
ret = fn(dcgm_handle, type, groupName, byref(c_group_id))
|
|
232
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
233
|
+
return c_group_id
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@ensure_byte_strings()
|
|
237
|
+
def dcgmGroupDestroy(dcgm_handle, group_id):
|
|
238
|
+
fn = dcgmFP("dcgmGroupDestroy")
|
|
239
|
+
ret = fn(dcgm_handle, group_id)
|
|
240
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
241
|
+
return ret
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@ensure_byte_strings()
|
|
245
|
+
def dcgmGroupAddDevice(dcgm_handle, group_id, gpu_id):
|
|
246
|
+
fn = dcgmFP("dcgmGroupAddDevice")
|
|
247
|
+
ret = fn(dcgm_handle, group_id, gpu_id)
|
|
248
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
249
|
+
return ret
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@ensure_byte_strings()
|
|
253
|
+
def dcgmGroupAddEntity(dcgm_handle, group_id, entityGroupId, entityId):
|
|
254
|
+
fn = dcgmFP("dcgmGroupAddEntity")
|
|
255
|
+
ret = fn(dcgm_handle, group_id, entityGroupId, entityId)
|
|
256
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
257
|
+
return ret
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@ensure_byte_strings()
|
|
261
|
+
def dcgmGroupRemoveDevice(dcgm_handle, group_id, gpu_id):
|
|
262
|
+
fn = dcgmFP("dcgmGroupRemoveDevice")
|
|
263
|
+
ret = fn(dcgm_handle, group_id, gpu_id)
|
|
264
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
265
|
+
return ret
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
@ensure_byte_strings()
|
|
269
|
+
def dcgmGroupRemoveEntity(dcgm_handle, group_id, entityGroupId, entityId):
|
|
270
|
+
fn = dcgmFP("dcgmGroupRemoveEntity")
|
|
271
|
+
ret = fn(dcgm_handle, group_id, entityGroupId, entityId)
|
|
272
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
273
|
+
return ret
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@ensure_byte_strings()
|
|
277
|
+
def dcgmGroupGetInfo(dcgm_handle,
|
|
278
|
+
group_id,
|
|
279
|
+
version=dcgm_structs.c_dcgmGroupInfo_version2):
|
|
280
|
+
fn = dcgmFP("dcgmGroupGetInfo")
|
|
281
|
+
|
|
282
|
+
#support the old version of the request since the host engine does
|
|
283
|
+
if version == dcgm_structs.c_dcgmGroupInfo_version2:
|
|
284
|
+
device_values = dcgm_structs.c_dcgmGroupInfo_v2()
|
|
285
|
+
device_values.version = dcgm_structs.c_dcgmGroupInfo_version2
|
|
286
|
+
else:
|
|
287
|
+
dcgm_structs._dcgmCheckReturn(dcgm_structs.DCGM_ST_VER_MISMATCH)
|
|
288
|
+
|
|
289
|
+
ret = fn(dcgm_handle, group_id, byref(device_values))
|
|
290
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
291
|
+
return device_values
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@ensure_byte_strings()
|
|
295
|
+
def dcgmGroupGetAllIds(dcgmHandle):
|
|
296
|
+
fn = dcgmFP("dcgmGroupGetAllIds")
|
|
297
|
+
c_count = c_uint()
|
|
298
|
+
groupIdList = c_void_p * dcgm_structs.DCGM_MAX_NUM_GROUPS
|
|
299
|
+
c_groupIdList = groupIdList()
|
|
300
|
+
ret = fn(dcgmHandle, c_groupIdList, byref(c_count))
|
|
301
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
302
|
+
return list(c_groupIdList[0:int(c_count.value)])
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
@ensure_byte_strings()
|
|
306
|
+
def dcgmFieldGroupCreate(dcgm_handle, fieldIds, fieldGroupName):
|
|
307
|
+
c_field_group_id = c_void_p()
|
|
308
|
+
c_num_field_ids = c_int32(len(fieldIds))
|
|
309
|
+
c_field_ids = (c_uint16 * len(fieldIds))(*fieldIds)
|
|
310
|
+
fn = dcgmFP("dcgmFieldGroupCreate")
|
|
311
|
+
ret = fn(dcgm_handle, c_num_field_ids, byref(c_field_ids), fieldGroupName,
|
|
312
|
+
byref(c_field_group_id))
|
|
313
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
314
|
+
return c_field_group_id
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
@ensure_byte_strings()
|
|
318
|
+
def dcgmFieldGroupDestroy(dcgm_handle, fieldGroupId):
|
|
319
|
+
fn = dcgmFP("dcgmFieldGroupDestroy")
|
|
320
|
+
ret = fn(dcgm_handle, fieldGroupId)
|
|
321
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
322
|
+
return ret
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
@ensure_byte_strings()
|
|
326
|
+
def dcgmFieldGroupGetInfo(dcgm_handle, fieldGroupId):
|
|
327
|
+
c_fieldGroupInfo = dcgm_structs.c_dcgmFieldGroupInfo_v1()
|
|
328
|
+
c_fieldGroupInfo.version = dcgm_structs.dcgmFieldGroupInfo_version1
|
|
329
|
+
c_fieldGroupInfo.fieldGroupId = fieldGroupId
|
|
330
|
+
fn = dcgmFP("dcgmFieldGroupGetInfo")
|
|
331
|
+
ret = fn(dcgm_handle, byref(c_fieldGroupInfo))
|
|
332
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
333
|
+
return c_fieldGroupInfo
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
@ensure_byte_strings()
|
|
337
|
+
def dcgmFieldGroupGetAll(dcgm_handle):
|
|
338
|
+
c_allGroupInfo = dcgm_structs.c_dcgmAllFieldGroup_v1()
|
|
339
|
+
c_allGroupInfo.version = dcgm_structs.dcgmAllFieldGroup_version1
|
|
340
|
+
fn = dcgmFP("dcgmFieldGroupGetAll")
|
|
341
|
+
ret = fn(dcgm_handle, byref(c_allGroupInfo))
|
|
342
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
343
|
+
return c_allGroupInfo
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
@ensure_byte_strings()
|
|
347
|
+
def dcgmStatusCreate():
|
|
348
|
+
c_status_handle = c_void_p()
|
|
349
|
+
fn = dcgmFP("dcgmStatusCreate")
|
|
350
|
+
ret = fn(byref(c_status_handle))
|
|
351
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
352
|
+
return c_status_handle
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
@ensure_byte_strings()
|
|
356
|
+
def dcgmStatusDestroy(status_handle):
|
|
357
|
+
fn = dcgmFP("dcgmStatusDestroy")
|
|
358
|
+
ret = fn(status_handle)
|
|
359
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
360
|
+
return ret
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
@ensure_byte_strings()
|
|
364
|
+
def dcgmStatusGetCount(status_handle):
|
|
365
|
+
c_count = c_uint()
|
|
366
|
+
fn = dcgmFP("dcgmStatusGetCount")
|
|
367
|
+
ret = fn(status_handle, byref(c_count))
|
|
368
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
369
|
+
return c_count.value
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
@ensure_byte_strings()
|
|
373
|
+
def dcgmStatusPopError(status_handle):
|
|
374
|
+
c_errorInfo = dcgm_structs.c_dcgmErrorInfo_v1()
|
|
375
|
+
fn = dcgmFP("dcgmStatusPopError")
|
|
376
|
+
ret = fn(status_handle, byref(c_errorInfo))
|
|
377
|
+
if ret == dcgm_structs.DCGM_ST_OK:
|
|
378
|
+
return c_errorInfo
|
|
379
|
+
else:
|
|
380
|
+
return None
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
@ensure_byte_strings()
|
|
384
|
+
def dcgmStatusClear(status_handle):
|
|
385
|
+
fn = dcgmFP("dcgmStatusClear")
|
|
386
|
+
ret = fn(status_handle)
|
|
387
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
388
|
+
return ret
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
@ensure_byte_strings()
|
|
392
|
+
def dcgmConfigSet(dcgm_handle, group_id, configToSet, status_handle):
|
|
393
|
+
fn = dcgmFP("dcgmConfigSet")
|
|
394
|
+
configToSet.version = dcgm_structs.dcgmDeviceConfig_version1
|
|
395
|
+
ret = fn(dcgm_handle, group_id, byref(configToSet), status_handle)
|
|
396
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
397
|
+
return ret
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
@ensure_byte_strings()
|
|
401
|
+
def dcgmConfigGet(dcgm_handle, group_id, reqCfgType, count, status_handle):
|
|
402
|
+
fn = dcgmFP("dcgmConfigGet")
|
|
403
|
+
|
|
404
|
+
config_values_array = count * dcgm_structs.c_dcgmDeviceConfig_v1
|
|
405
|
+
c_config_values = config_values_array()
|
|
406
|
+
|
|
407
|
+
for index in range(0, count):
|
|
408
|
+
c_config_values[index].version = dcgm_structs.dcgmDeviceConfig_version1
|
|
409
|
+
|
|
410
|
+
ret = fn(dcgm_handle, group_id, reqCfgType, count, c_config_values,
|
|
411
|
+
status_handle)
|
|
412
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
413
|
+
return list(c_config_values[0:count])
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
@ensure_byte_strings()
|
|
417
|
+
def dcgmConfigEnforce(dcgm_handle, group_id, status_handle):
|
|
418
|
+
fn = dcgmFP("dcgmConfigEnforce")
|
|
419
|
+
ret = fn(dcgm_handle, group_id, status_handle)
|
|
420
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
421
|
+
return ret
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
# This method is used to tell the cache manager to update all fields
|
|
425
|
+
@ensure_byte_strings()
|
|
426
|
+
def dcgmUpdateAllFields(dcgm_handle, waitForUpdate):
|
|
427
|
+
fn = dcgmFP("dcgmUpdateAllFields")
|
|
428
|
+
ret = fn(dcgm_handle, c_int(waitForUpdate))
|
|
429
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
430
|
+
return ret
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# This method is used to get the policy information
|
|
434
|
+
@ensure_byte_strings()
|
|
435
|
+
def dcgmPolicyGet(dcgm_handle, group_id, count, status_handle):
|
|
436
|
+
fn = dcgmFP("dcgmPolicyGet")
|
|
437
|
+
policy_array = count * dcgm_structs.c_dcgmPolicy_v1
|
|
438
|
+
|
|
439
|
+
c_policy_values = policy_array()
|
|
440
|
+
|
|
441
|
+
for index in range(0, count):
|
|
442
|
+
c_policy_values[index].version = dcgm_structs.dcgmPolicy_version1
|
|
443
|
+
|
|
444
|
+
ret = fn(dcgm_handle, group_id, count, c_policy_values, status_handle)
|
|
445
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
446
|
+
return c_policy_values[0:count]
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
# This method is used to set the policy information
|
|
450
|
+
@ensure_byte_strings()
|
|
451
|
+
def dcgmPolicySet(dcgm_handle, group_id, policy, status_handle):
|
|
452
|
+
fn = dcgmFP("dcgmPolicySet")
|
|
453
|
+
ret = fn(dcgm_handle, group_id, byref(policy), status_handle)
|
|
454
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
455
|
+
return ret
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
#First parameter below is the return type
|
|
459
|
+
dcgmFieldValueEnumeration_f = CFUNCTYPE(
|
|
460
|
+
c_int32, c_uint32, POINTER(dcgm_structs.c_dcgmFieldValue_v1), c_int32,
|
|
461
|
+
c_void_p)
|
|
462
|
+
dcgmFieldValueEntityEnumeration_f = CFUNCTYPE(
|
|
463
|
+
c_int32, c_uint32, c_uint32, POINTER(dcgm_structs.c_dcgmFieldValue_v1),
|
|
464
|
+
c_int32, c_void_p)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
@ensure_byte_strings()
|
|
468
|
+
def dcgmGetValuesSince(dcgm_handle, groupId, fieldGroupId, sinceTimestamp,
|
|
469
|
+
enumCB, userData):
|
|
470
|
+
fn = dcgmFP("dcgmGetValuesSince")
|
|
471
|
+
c_nextSinceTimestamp = c_int64()
|
|
472
|
+
ret = fn(dcgm_handle, groupId, fieldGroupId, c_int64(sinceTimestamp),
|
|
473
|
+
byref(c_nextSinceTimestamp), enumCB, py_object(userData))
|
|
474
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
475
|
+
return c_nextSinceTimestamp.value
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
@ensure_byte_strings()
|
|
479
|
+
def dcgmGetValuesSince_v2(dcgm_handle, groupId, fieldGroupId, sinceTimestamp,
|
|
480
|
+
enumCB, userData):
|
|
481
|
+
fn = dcgmFP("dcgmGetValuesSince_v2")
|
|
482
|
+
c_nextSinceTimestamp = c_int64()
|
|
483
|
+
ret = fn(dcgm_handle, groupId, fieldGroupId, c_int64(sinceTimestamp),
|
|
484
|
+
byref(c_nextSinceTimestamp), enumCB, py_object(userData))
|
|
485
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
486
|
+
return c_nextSinceTimestamp.value
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
@ensure_byte_strings()
|
|
490
|
+
def dcgmGetLatestValues(dcgm_handle, groupId, fieldGroupId, enumCB, userData):
|
|
491
|
+
fn = dcgmFP("dcgmGetLatestValues")
|
|
492
|
+
ret = fn(dcgm_handle, groupId, fieldGroupId, enumCB, py_object(userData))
|
|
493
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
494
|
+
return ret
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
@ensure_byte_strings()
|
|
498
|
+
def dcgmGetLatestValues_v2(dcgm_handle, groupId, fieldGroupId, enumCB,
|
|
499
|
+
userData):
|
|
500
|
+
fn = dcgmFP("dcgmGetLatestValues_v2")
|
|
501
|
+
ret = fn(dcgm_handle, groupId, fieldGroupId, enumCB, py_object(userData))
|
|
502
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
503
|
+
return ret
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
@ensure_byte_strings()
|
|
507
|
+
def dcgmWatchFields(dcgm_handle, groupId, fieldGroupId, updateFreq, maxKeepAge,
|
|
508
|
+
maxKeepSamples):
|
|
509
|
+
fn = dcgmFP("dcgmWatchFields")
|
|
510
|
+
ret = fn(dcgm_handle, groupId, fieldGroupId, c_int64(updateFreq),
|
|
511
|
+
c_double(maxKeepAge), c_int32(maxKeepSamples))
|
|
512
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
513
|
+
return ret
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
@ensure_byte_strings()
|
|
517
|
+
def dcgmUnwatchFields(dcgm_handle, groupId, fieldGroupId):
|
|
518
|
+
fn = dcgmFP("dcgmUnwatchFields")
|
|
519
|
+
ret = fn(dcgm_handle, groupId, fieldGroupId)
|
|
520
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
521
|
+
return ret
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
@ensure_byte_strings()
|
|
525
|
+
def dcgmHealthSet(dcgm_handle, groupId, systems):
|
|
526
|
+
fn = dcgmFP("dcgmHealthSet")
|
|
527
|
+
ret = fn(dcgm_handle, groupId, systems)
|
|
528
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
529
|
+
return ret
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
@ensure_byte_strings()
|
|
533
|
+
def dcgmHealthSet_v2(dcgm_handle, groupId, systems, updateInterval, maxKeepAge):
|
|
534
|
+
params = dcgm_structs.c_dcgmHealthSetParams_v2()
|
|
535
|
+
params.version = dcgm_structs.dcgmHealthSetParams_version2
|
|
536
|
+
params.groupId = groupId
|
|
537
|
+
params.systems = systems
|
|
538
|
+
params.updateInterval = updateInterval
|
|
539
|
+
params.maxKeepAge = maxKeepAge
|
|
540
|
+
|
|
541
|
+
fn = dcgmFP("dcgmHealthSet_v2")
|
|
542
|
+
ret = fn(dcgm_handle, byref(params))
|
|
543
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
544
|
+
return ret
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
@ensure_byte_strings()
|
|
548
|
+
def dcgmHealthGet(dcgm_handle, groupId):
|
|
549
|
+
c_systems = c_int32()
|
|
550
|
+
fn = dcgmFP("dcgmHealthGet")
|
|
551
|
+
ret = fn(dcgm_handle, groupId, byref(c_systems))
|
|
552
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
553
|
+
return c_systems.value
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
@ensure_byte_strings()
|
|
557
|
+
def dcgmHealthCheck(dcgm_handle,
|
|
558
|
+
groupId,
|
|
559
|
+
version=dcgm_structs.dcgmHealthResponse_version4):
|
|
560
|
+
if version != dcgm_structs.dcgmHealthResponse_version4:
|
|
561
|
+
dcgm_structs._dcgmCheckReturn(dcgm_structs.DCGM_ST_VER_MISMATCH)
|
|
562
|
+
|
|
563
|
+
c_results = dcgm_structs.c_dcgmHealthResponse_v4()
|
|
564
|
+
c_results.version = dcgm_structs.dcgmHealthResponse_version4
|
|
565
|
+
fn = dcgmFP("dcgmHealthCheck")
|
|
566
|
+
ret = fn(dcgm_handle, groupId, byref(c_results))
|
|
567
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
568
|
+
return c_results
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
@ensure_byte_strings()
|
|
572
|
+
def dcgmPolicyRegister(dcgm_handle, groupId, condition, beginCallback,
|
|
573
|
+
finishCallback):
|
|
574
|
+
fn = dcgmFP("dcgmPolicyRegister")
|
|
575
|
+
ret = fn(dcgm_handle, groupId, condition, beginCallback, finishCallback)
|
|
576
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
577
|
+
return ret
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
@ensure_byte_strings()
|
|
581
|
+
def dcgmPolicyUnregister(dcgm_handle, groupId, condition):
|
|
582
|
+
fn = dcgmFP("dcgmPolicyUnregister")
|
|
583
|
+
ret = fn(dcgm_handle, groupId, condition)
|
|
584
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
585
|
+
return ret
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
@ensure_byte_strings()
|
|
589
|
+
def dcgmPolicyTrigger(dcgm_handle):
|
|
590
|
+
fn = dcgmFP("dcgmPolicyTrigger")
|
|
591
|
+
ret = fn(dcgm_handle)
|
|
592
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
593
|
+
return ret
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def helperDiagCheckReturn(ret, response):
|
|
597
|
+
try:
|
|
598
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
599
|
+
except dcgm_structs.DCGMError as e:
|
|
600
|
+
if response.systemError.msg != "":
|
|
601
|
+
# Add systemError information to the raised exception.
|
|
602
|
+
import sys
|
|
603
|
+
|
|
604
|
+
info = "%s" % response.systemError.msg
|
|
605
|
+
e.SetAdditionalInfo(info)
|
|
606
|
+
raise e
|
|
607
|
+
else:
|
|
608
|
+
raise
|
|
609
|
+
|
|
610
|
+
return response
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
@ensure_byte_strings()
|
|
614
|
+
def dcgmActionValidate_v2(dcgm_handle,
|
|
615
|
+
runDiagInfo,
|
|
616
|
+
runDiagVersion=dcgm_structs.dcgmRunDiag_version7):
|
|
617
|
+
response = dcgm_structs.c_dcgmDiagResponse_v8()
|
|
618
|
+
runDiagInfo.version = runDiagVersion
|
|
619
|
+
response.version = dcgm_structs.dcgmDiagResponse_version8
|
|
620
|
+
fn = dcgmFP("dcgmActionValidate_v2")
|
|
621
|
+
ret = fn(dcgm_handle, byref(runDiagInfo), byref(response))
|
|
622
|
+
|
|
623
|
+
return helperDiagCheckReturn(ret, response)
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
@ensure_byte_strings()
|
|
627
|
+
def dcgmActionValidate(dcgm_handle, group_id, validate):
|
|
628
|
+
response = dcgm_structs.c_dcgmDiagResponse_v8()
|
|
629
|
+
response.version = dcgm_structs.dcgmDiagResponse_version8
|
|
630
|
+
|
|
631
|
+
# Put the group_id and validate into a dcgmRunDiag struct
|
|
632
|
+
runDiagInfo = dcgm_structs.c_dcgmRunDiag_v7()
|
|
633
|
+
runDiagInfo.version = dcgm_structs.dcgmRunDiag_version7
|
|
634
|
+
runDiagInfo.validate = validate
|
|
635
|
+
runDiagInfo.groupId = group_id
|
|
636
|
+
|
|
637
|
+
fn = dcgmFP("dcgmActionValidate_v2")
|
|
638
|
+
ret = fn(dcgm_handle, byref(runDiagInfo), byref(response))
|
|
639
|
+
|
|
640
|
+
return helperDiagCheckReturn(ret, response)
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
@ensure_byte_strings()
|
|
644
|
+
def dcgmRunDiagnostic(dcgm_handle, group_id, diagLevel):
|
|
645
|
+
response = dcgm_structs.c_dcgmDiagResponse_v8()
|
|
646
|
+
response.version = dcgm_structs.dcgmDiagResponse_version8
|
|
647
|
+
fn = dcgmFP("dcgmRunDiagnostic")
|
|
648
|
+
ret = fn(dcgm_handle, group_id, diagLevel, byref(response))
|
|
649
|
+
|
|
650
|
+
return helperDiagCheckReturn(ret, response)
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
@ensure_byte_strings()
|
|
654
|
+
def dcgmWatchPidFields(dcgm_handle, groupId, updateFreq, maxKeepAge,
|
|
655
|
+
maxKeepSamples):
|
|
656
|
+
fn = dcgmFP("dcgmWatchPidFields")
|
|
657
|
+
ret = fn(dcgm_handle, groupId, c_int64(updateFreq), c_double(maxKeepAge),
|
|
658
|
+
c_int32(maxKeepSamples))
|
|
659
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
660
|
+
return ret
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
@ensure_byte_strings()
|
|
664
|
+
def dcgmGetPidInfo(dcgm_handle, groupId, pid):
|
|
665
|
+
fn = dcgmFP("dcgmGetPidInfo")
|
|
666
|
+
pidInfo = dcgm_structs.c_dcgmPidInfo_v2()
|
|
667
|
+
|
|
668
|
+
pidInfo.version = dcgm_structs.dcgmPidInfo_version2
|
|
669
|
+
pidInfo.pid = pid
|
|
670
|
+
|
|
671
|
+
ret = fn(dcgm_handle, groupId, byref(pidInfo))
|
|
672
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
673
|
+
return pidInfo
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
@ensure_byte_strings()
|
|
677
|
+
def dcgmGetDeviceTopology(dcgm_handle, gpuId):
|
|
678
|
+
devtopo = dcgm_structs.c_dcgmDeviceTopology_v1()
|
|
679
|
+
fn = dcgmFP("dcgmGetDeviceTopology")
|
|
680
|
+
ret = fn(dcgm_handle, gpuId, byref(devtopo))
|
|
681
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
682
|
+
return devtopo
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
@ensure_byte_strings()
|
|
686
|
+
def dcgmGetGroupTopology(dcgm_handle, groupId):
|
|
687
|
+
grouptopo = dcgm_structs.c_dcgmGroupTopology_v1()
|
|
688
|
+
fn = dcgmFP("dcgmGetGroupTopology")
|
|
689
|
+
ret = fn(dcgm_handle, groupId, byref(grouptopo))
|
|
690
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
691
|
+
return grouptopo
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
@ensure_byte_strings()
|
|
695
|
+
def dcgmWatchJobFields(dcgm_handle, groupId, updateFreq, maxKeepAge,
|
|
696
|
+
maxKeepSamples):
|
|
697
|
+
fn = dcgmFP("dcgmWatchJobFields")
|
|
698
|
+
ret = fn(dcgm_handle, groupId, c_int64(updateFreq), c_double(maxKeepAge),
|
|
699
|
+
c_int32(maxKeepSamples))
|
|
700
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
701
|
+
return ret
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
@ensure_byte_strings()
|
|
705
|
+
def dcgmJobStartStats(dcgm_handle, groupId, jobid):
|
|
706
|
+
fn = dcgmFP("dcgmJobStartStats")
|
|
707
|
+
ret = fn(dcgm_handle, groupId, jobid)
|
|
708
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
709
|
+
return ret
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
@ensure_byte_strings()
|
|
713
|
+
def dcgmJobStopStats(dcgm_handle, jobid):
|
|
714
|
+
fn = dcgmFP("dcgmJobStopStats")
|
|
715
|
+
ret = fn(dcgm_handle, jobid)
|
|
716
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
717
|
+
return ret
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
@ensure_byte_strings()
|
|
721
|
+
def dcgmJobGetStats(dcgm_handle, jobid):
|
|
722
|
+
fn = dcgmFP("dcgmJobGetStats")
|
|
723
|
+
jobInfo = dcgm_structs.c_dcgmJobInfo_v3()
|
|
724
|
+
|
|
725
|
+
jobInfo.version = dcgm_structs.dcgmJobInfo_version3
|
|
726
|
+
|
|
727
|
+
ret = fn(dcgm_handle, jobid, byref(jobInfo))
|
|
728
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
729
|
+
return jobInfo
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
@ensure_byte_strings()
|
|
733
|
+
def dcgmJobRemove(dcgm_handle, jobid):
|
|
734
|
+
fn = dcgmFP("dcgmJobRemove")
|
|
735
|
+
ret = fn(dcgm_handle, jobid)
|
|
736
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
737
|
+
return ret
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
@ensure_byte_strings()
|
|
741
|
+
def dcgmJobRemoveAll(dcgm_handle):
|
|
742
|
+
fn = dcgmFP("dcgmJobRemoveAll")
|
|
743
|
+
ret = fn(dcgm_handle)
|
|
744
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
745
|
+
return ret
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
@ensure_byte_strings()
|
|
749
|
+
def dcgmIntrospectGetHostengineMemoryUsage(dcgm_handle, waitIfNoData=True):
|
|
750
|
+
fn = dcgmFP("dcgmIntrospectGetHostengineMemoryUsage")
|
|
751
|
+
|
|
752
|
+
memInfo = dcgm_structs.c_dcgmIntrospectMemory_v1()
|
|
753
|
+
memInfo.version = dcgm_structs.dcgmIntrospectMemory_version1
|
|
754
|
+
|
|
755
|
+
ret = fn(dcgm_handle, byref(memInfo), waitIfNoData)
|
|
756
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
757
|
+
return memInfo
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
@ensure_byte_strings()
|
|
761
|
+
def dcgmIntrospectGetHostengineCpuUtilization(dcgm_handle, waitIfNoData=True):
|
|
762
|
+
fn = dcgmFP("dcgmIntrospectGetHostengineCpuUtilization")
|
|
763
|
+
|
|
764
|
+
cpuUtil = dcgm_structs.c_dcgmIntrospectCpuUtil_v1()
|
|
765
|
+
cpuUtil.version = dcgm_structs.dcgmIntrospectCpuUtil_version1
|
|
766
|
+
|
|
767
|
+
ret = fn(dcgm_handle, byref(cpuUtil), waitIfNoData)
|
|
768
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
769
|
+
return cpuUtil
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
@ensure_byte_strings()
|
|
773
|
+
def dcgmEntityGetLatestValues(dcgmHandle, entityGroup, entityId, fieldIds):
|
|
774
|
+
fn = dcgmFP("dcgmEntityGetLatestValues")
|
|
775
|
+
field_values = (dcgm_structs.c_dcgmFieldValue_v1 * len(fieldIds))()
|
|
776
|
+
id_values = (c_uint16 * len(fieldIds))(*fieldIds)
|
|
777
|
+
ret = fn(dcgmHandle, c_uint(entityGroup),
|
|
778
|
+
dcgm_fields.c_dcgm_field_eid_t(entityId), id_values,
|
|
779
|
+
c_uint(len(fieldIds)), field_values)
|
|
780
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
781
|
+
return field_values
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
@ensure_byte_strings()
|
|
785
|
+
def dcgmEntitiesGetLatestValues(dcgmHandle, entities, fieldIds, flags):
|
|
786
|
+
fn = dcgmFP("dcgmEntitiesGetLatestValues")
|
|
787
|
+
numFvs = len(fieldIds) * len(entities)
|
|
788
|
+
field_values = (dcgm_structs.c_dcgmFieldValue_v2 * numFvs)()
|
|
789
|
+
entities_values = (dcgm_structs.c_dcgmGroupEntityPair_t *
|
|
790
|
+
len(entities))(*entities)
|
|
791
|
+
field_id_values = (c_uint16 * len(fieldIds))(*fieldIds)
|
|
792
|
+
ret = fn(dcgmHandle, entities_values, c_uint(len(entities)),
|
|
793
|
+
field_id_values, c_uint(len(fieldIds)), flags, field_values)
|
|
794
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
795
|
+
return field_values
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
@ensure_byte_strings()
|
|
799
|
+
def dcgmSelectGpusByTopology(dcgmHandle, inputGpuIds, numGpus, hintFlags):
|
|
800
|
+
fn = dcgmFP("dcgmSelectGpusByTopology")
|
|
801
|
+
outputGpuIds = c_int64()
|
|
802
|
+
ret = fn(dcgmHandle, c_uint64(inputGpuIds), c_uint32(numGpus),
|
|
803
|
+
byref(outputGpuIds), c_uint64(hintFlags))
|
|
804
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
805
|
+
return outputGpuIds
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
@ensure_byte_strings()
|
|
809
|
+
def dcgmGetFieldSummary(dcgmHandle, fieldId, entityGroupType, entityId,
|
|
810
|
+
summaryMask, startTime, endTime):
|
|
811
|
+
fn = dcgmFP("dcgmGetFieldSummary")
|
|
812
|
+
request = dcgm_structs.c_dcgmFieldSummaryRequest_v1()
|
|
813
|
+
request.version = dcgm_structs.dcgmFieldSummaryRequest_version1
|
|
814
|
+
request.fieldId = fieldId
|
|
815
|
+
request.entityGroupType = entityGroupType
|
|
816
|
+
request.entityId = entityId
|
|
817
|
+
request.summaryTypeMask = summaryMask
|
|
818
|
+
request.startTime = startTime
|
|
819
|
+
request.endTime = endTime
|
|
820
|
+
ret = fn(dcgmHandle, byref(request))
|
|
821
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
822
|
+
return request
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
@ensure_byte_strings()
|
|
826
|
+
def dcgmModuleDenylist(dcgmHandle, moduleId):
|
|
827
|
+
fn = dcgmFP("dcgmModuleDenylist")
|
|
828
|
+
ret = fn(dcgmHandle, c_uint32(moduleId))
|
|
829
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
830
|
+
return ret
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
@ensure_byte_strings()
|
|
834
|
+
def dcgmModuleGetStatuses(dcgmHandle):
|
|
835
|
+
moduleStatuses = dcgm_structs.c_dcgmModuleGetStatuses_v1()
|
|
836
|
+
moduleStatuses.version = dcgm_structs.dcgmModuleGetStatuses_version1
|
|
837
|
+
fn = dcgmFP("dcgmModuleGetStatuses")
|
|
838
|
+
ret = fn(dcgmHandle, byref(moduleStatuses))
|
|
839
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
840
|
+
return moduleStatuses
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
@ensure_byte_strings()
|
|
844
|
+
def dcgmProfGetSupportedMetricGroups(dcgmHandle, gpuId):
|
|
845
|
+
msg = dcgm_structs.c_dcgmProfGetMetricGroups_v3()
|
|
846
|
+
msg.version = dcgm_structs.dcgmProfGetMetricGroups_version3
|
|
847
|
+
msg.gpuId = gpuId
|
|
848
|
+
fn = dcgmFP("dcgmProfGetSupportedMetricGroups")
|
|
849
|
+
ret = fn(dcgmHandle, byref(msg))
|
|
850
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
851
|
+
return msg
|
|
852
|
+
|
|
853
|
+
|
|
854
|
+
@ensure_byte_strings()
|
|
855
|
+
def dcgmProfPause(dcgmHandle):
|
|
856
|
+
fn = dcgmFP("dcgmProfPause")
|
|
857
|
+
ret = fn(dcgmHandle)
|
|
858
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
859
|
+
return ret
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
@ensure_byte_strings()
|
|
863
|
+
def dcgmProfResume(dcgmHandle):
|
|
864
|
+
fn = dcgmFP("dcgmProfResume")
|
|
865
|
+
ret = fn(dcgmHandle)
|
|
866
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
867
|
+
return ret
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
@ensure_byte_strings()
|
|
871
|
+
def dcgmVersionInfo():
|
|
872
|
+
msg = dcgm_structs.c_dcgmVersionInfo_v2()
|
|
873
|
+
msg.version = dcgm_structs.dcgmVersionInfo_version2
|
|
874
|
+
fn = dcgmFP("dcgmVersionInfo")
|
|
875
|
+
ret = fn(byref(msg))
|
|
876
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
877
|
+
return msg
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
@ensure_byte_strings()
|
|
881
|
+
def dcgmHostengineIsHealthy(dcgmHandle):
|
|
882
|
+
heHealth = dcgm_structs.c_dcgmHostengineHealth_v1()
|
|
883
|
+
heHealth.version = dcgm_structs.dcgmHostengineHealth_version1
|
|
884
|
+
fn = dcgmFP("dcgmHostengineIsHealthy")
|
|
885
|
+
ret = fn(dcgmHandle, byref(heHealth))
|
|
886
|
+
dcgm_structs._dcgmCheckReturn(ret)
|
|
887
|
+
return heHealth
|