triton-windows 3.3.1.post19__cp310-cp310-win_amd64.whl → 3.3.1.post21__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/backends/amd/driver.py +6 -1
- triton/backends/nvidia/compiler.py +1 -3
- triton/backends/nvidia/driver.py +7 -3
- triton/runtime/autotuner.py +2 -2
- triton/runtime/build.py +5 -5
- triton/windows_utils.py +11 -4
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/METADATA +1 -1
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/RECORD +11 -108
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/WHEEL +0 -0
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/top_level.txt +0 -0
|
@@ -1,416 +0,0 @@
|
|
|
1
|
-
////////////////////////////////////////////////////////////////////////////////
|
|
2
|
-
//
|
|
3
|
-
// The University of Illinois/NCSA
|
|
4
|
-
// Open Source License (NCSA)
|
|
5
|
-
//
|
|
6
|
-
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
|
|
7
|
-
//
|
|
8
|
-
// Developed by:
|
|
9
|
-
//
|
|
10
|
-
// AMD Research and AMD HSA Software Development
|
|
11
|
-
//
|
|
12
|
-
// Advanced Micro Devices, Inc.
|
|
13
|
-
//
|
|
14
|
-
// www.amd.com
|
|
15
|
-
//
|
|
16
|
-
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
17
|
-
// of this software and associated documentation files (the "Software"), to
|
|
18
|
-
// deal with the Software without restriction, including without limitation
|
|
19
|
-
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
20
|
-
// and/or sell copies of the Software, and to permit persons to whom the
|
|
21
|
-
// Software is furnished to do so, subject to the following conditions:
|
|
22
|
-
//
|
|
23
|
-
// - Redistributions of source code must retain the above copyright notice,
|
|
24
|
-
// this list of conditions and the following disclaimers.
|
|
25
|
-
// - Redistributions in binary form must reproduce the above copyright
|
|
26
|
-
// notice, this list of conditions and the following disclaimers in
|
|
27
|
-
// the documentation and/or other materials provided with the distribution.
|
|
28
|
-
// - Neither the names of Advanced Micro Devices, Inc,
|
|
29
|
-
// nor the names of its contributors may be used to endorse or promote
|
|
30
|
-
// products derived from this Software without specific prior written
|
|
31
|
-
// permission.
|
|
32
|
-
//
|
|
33
|
-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
34
|
-
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
35
|
-
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
36
|
-
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
37
|
-
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
38
|
-
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
39
|
-
// DEALINGS WITH THE SOFTWARE.
|
|
40
|
-
//
|
|
41
|
-
////////////////////////////////////////////////////////////////////////////////
|
|
42
|
-
|
|
43
|
-
#ifndef HSA_VEN_AMD_PC_SAMPLING_H
|
|
44
|
-
#define HSA_VEN_AMD_PC_SAMPLING_H
|
|
45
|
-
|
|
46
|
-
#include "hsa.h"
|
|
47
|
-
|
|
48
|
-
#ifdef __cplusplus
|
|
49
|
-
extern "C" {
|
|
50
|
-
#endif /*__cplusplus*/
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* @brief HSA AMD Vendor PC Sampling APIs
|
|
55
|
-
* EXPERIMENTAL: All PC Sampling APIs are currently in an experimental phase and the APIs may be
|
|
56
|
-
* modified extensively in the future
|
|
57
|
-
*/
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* @brief PC Sampling sample data for hosttrap sampling method
|
|
61
|
-
*/
|
|
62
|
-
typedef struct {
|
|
63
|
-
uint64_t pc;
|
|
64
|
-
uint64_t exec_mask;
|
|
65
|
-
uint32_t workgroup_id_x;
|
|
66
|
-
uint32_t workgroup_id_y;
|
|
67
|
-
uint32_t workgroup_id_z;
|
|
68
|
-
uint32_t wave_in_wg : 6;
|
|
69
|
-
uint32_t chiplet : 3; // Currently not used
|
|
70
|
-
uint32_t reserved : 23;
|
|
71
|
-
uint32_t hw_id;
|
|
72
|
-
uint32_t reserved0;
|
|
73
|
-
uint64_t reserved1;
|
|
74
|
-
uint64_t timestamp;
|
|
75
|
-
uint64_t correlation_id;
|
|
76
|
-
} perf_sample_hosttrap_v1_t;
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* @brief PC Sampling sample data for stochastic sampling method
|
|
80
|
-
*/
|
|
81
|
-
typedef struct {
|
|
82
|
-
uint64_t pc;
|
|
83
|
-
uint64_t exec_mask;
|
|
84
|
-
uint32_t workgroup_id_x;
|
|
85
|
-
uint32_t workgroup_id_y;
|
|
86
|
-
uint32_t workgroup_id_z;
|
|
87
|
-
uint32_t wave_in_wg : 6;
|
|
88
|
-
uint32_t chiplet : 3; // Currently not used
|
|
89
|
-
uint32_t reserved : 23;
|
|
90
|
-
uint32_t hw_id;
|
|
91
|
-
uint32_t perf_snapshot_data;
|
|
92
|
-
uint32_t perf_snapshot_data1;
|
|
93
|
-
uint32_t perf_snapshot_data2;
|
|
94
|
-
uint64_t timestamp;
|
|
95
|
-
uint64_t correlation_id;
|
|
96
|
-
} perf_sample_snapshot_v1_t;
|
|
97
|
-
|
|
98
|
-
/**
|
|
99
|
-
* @brief PC Sampling method kinds
|
|
100
|
-
*/
|
|
101
|
-
typedef enum {
|
|
102
|
-
HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1,
|
|
103
|
-
HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1
|
|
104
|
-
} hsa_ven_amd_pcs_method_kind_t;
|
|
105
|
-
|
|
106
|
-
/**
|
|
107
|
-
* @brief PC Sampling interval unit type
|
|
108
|
-
*/
|
|
109
|
-
typedef enum {
|
|
110
|
-
HSA_VEN_AMD_PCS_INTERVAL_UNITS_MICRO_SECONDS,
|
|
111
|
-
HSA_VEN_AMD_PCS_INTERVAL_UNITS_CLOCK_CYCLES,
|
|
112
|
-
HSA_VEN_AMD_PCS_INTERVAL_UNITS_INSTRUCTIONS
|
|
113
|
-
} hsa_ven_amd_pcs_units_t;
|
|
114
|
-
|
|
115
|
-
/**
|
|
116
|
-
* @brief HSA callback function to perform the copy onto a destination buffer
|
|
117
|
-
*
|
|
118
|
-
* If data_size is 0, HSA will stop current copy operation and keep remaining data in internal
|
|
119
|
-
* buffers. Remaining contents of HSA internal buffers will be included in next
|
|
120
|
-
* hsa_ven_amd_pcs_data_ready_callback_t. HSA internal buffers can also be drained by calling
|
|
121
|
-
* hsa_ven_amd_pcs_flush.
|
|
122
|
-
*
|
|
123
|
-
* @param[in] hsa_callback_data private data to pass back to HSA. Provided in
|
|
124
|
-
* hsa_ven_amd_pcs_data_ready_callback_t
|
|
125
|
-
*
|
|
126
|
-
* @param[in] data_size size of destination buffer in bytes.
|
|
127
|
-
* @param[in] destination destination buffer
|
|
128
|
-
* @retval TBD: but could be used to indicate that there is no more data to be read.
|
|
129
|
-
* Or indicate an error and abort of current copy operations
|
|
130
|
-
*/
|
|
131
|
-
typedef hsa_status_t (*hsa_ven_amd_pcs_data_copy_callback_t)(void* hsa_callback_data,
|
|
132
|
-
size_t data_size, void* destination);
|
|
133
|
-
|
|
134
|
-
/**
|
|
135
|
-
* @brief HSA callback function to to indicate that there is data ready to be copied
|
|
136
|
-
*
|
|
137
|
-
* When the client receives this callback, the client should call back @p data_copy_callback for HSA
|
|
138
|
-
* to perform the copy operation into an available buffer. @p data_copy_callback can be called back
|
|
139
|
-
* multiple times with smaller @p data_size to split the copy operation.
|
|
140
|
-
*
|
|
141
|
-
* This callback must not call ::hsa_ven_amd_pcs_flush.
|
|
142
|
-
*
|
|
143
|
-
* @param[in] client_callback_data client private data passed in via
|
|
144
|
-
* hsa_ven_amd_pcs_create/hsa_ven_amd_pcs_create_from_id
|
|
145
|
-
* @param[in] data_size size of data available to be copied
|
|
146
|
-
* @param[in] lost_sample_count number of lost samples since last call to
|
|
147
|
-
* hsa_ven_amd_pcs_data_ready_callback_t.
|
|
148
|
-
* @param[in] data_copy_callback callback function for HSA to perform the actual copy
|
|
149
|
-
* @param[in] hsa_callback_data private data to pass back to HSA
|
|
150
|
-
*/
|
|
151
|
-
typedef void (*hsa_ven_amd_pcs_data_ready_callback_t)(
|
|
152
|
-
void* client_callback_data, size_t data_size, size_t lost_sample_count,
|
|
153
|
-
hsa_ven_amd_pcs_data_copy_callback_t data_copy_callback, void* hsa_callback_data);
|
|
154
|
-
|
|
155
|
-
/**
|
|
156
|
-
* @brief Opaque handle representing a sampling session.
|
|
157
|
-
* Two sessions having same handle value represent the same session
|
|
158
|
-
*/
|
|
159
|
-
typedef struct {
|
|
160
|
-
uint64_t handle;
|
|
161
|
-
} hsa_ven_amd_pcs_t;
|
|
162
|
-
|
|
163
|
-
/**
|
|
164
|
-
* @brief PC Sampling configuration flag options
|
|
165
|
-
*/
|
|
166
|
-
typedef enum {
|
|
167
|
-
/* The interval for this sampling method have to be a power of 2 */
|
|
168
|
-
HSA_VEN_AMD_PCS_CONFIGURATION_FLAGS_INTERVAL_POWER_OF_2 = (1 << 0)
|
|
169
|
-
} hsa_ven_amd_pcs_configuration_flags_t;
|
|
170
|
-
|
|
171
|
-
/**
|
|
172
|
-
* @brief PC Sampling method information
|
|
173
|
-
* Used to provide client with list of supported PC Sampling methods
|
|
174
|
-
*/
|
|
175
|
-
typedef struct {
|
|
176
|
-
hsa_ven_amd_pcs_method_kind_t method;
|
|
177
|
-
hsa_ven_amd_pcs_units_t units;
|
|
178
|
-
size_t min_interval;
|
|
179
|
-
size_t max_interval;
|
|
180
|
-
uint64_t flags;
|
|
181
|
-
} hsa_ven_amd_pcs_configuration_t;
|
|
182
|
-
|
|
183
|
-
/**
|
|
184
|
-
* @brief Callback function to iterate through list of supported PC Sampling configurations
|
|
185
|
-
*
|
|
186
|
-
* @param[in] configuration one entry for supported PC Sampling method and configuration options
|
|
187
|
-
* @param[in] callback_data client private callback data that was passed in when calling
|
|
188
|
-
* hsa_ven_amd_pcs_iterate_configuration
|
|
189
|
-
*/
|
|
190
|
-
typedef hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration_callback_t)(
|
|
191
|
-
const hsa_ven_amd_pcs_configuration_t* configuration, void* callback_data);
|
|
192
|
-
|
|
193
|
-
/**
|
|
194
|
-
* @brief Iterate through list of current supported PC Sampling configurations for this @p agent
|
|
195
|
-
*
|
|
196
|
-
* HSA will callback @p configuration_callback for each currently available PC Sampling
|
|
197
|
-
* configuration. The list of currently available configurations may not be the complete list of
|
|
198
|
-
* configurations supported on the @p agent. The list of currently available configurations may be
|
|
199
|
-
* reduced if the @p agent is currently handling other PC sampling sessions.
|
|
200
|
-
*
|
|
201
|
-
* @param[in] agent target agent
|
|
202
|
-
* @param[in] configuration_callback callback function to iterate through list of configurations
|
|
203
|
-
* @param[in] callback_data client private callback data
|
|
204
|
-
**/
|
|
205
|
-
hsa_status_t hsa_ven_amd_pcs_iterate_configuration(
|
|
206
|
-
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
|
|
207
|
-
void* callback_data);
|
|
208
|
-
|
|
209
|
-
/**
|
|
210
|
-
* @brief Create a PC Sampling session on @p agent
|
|
211
|
-
*
|
|
212
|
-
* Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
|
|
213
|
-
* parameters must be a legal configuration value, as described by the
|
|
214
|
-
* hsa_ven_amd_pcs_configuration_t configurations passed to the callbacks of
|
|
215
|
-
* hsa_ven_amd_pcs_iterate_configuration for this @p agent.
|
|
216
|
-
* A successfull call may restrict the list of possible PC sampling methods available to subsequent
|
|
217
|
-
* calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
|
|
218
|
-
* on what types of PC sampling they can perform concurrently.
|
|
219
|
-
* For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
|
|
220
|
-
* The session will be in a stopped/inactive state after this call
|
|
221
|
-
*
|
|
222
|
-
* @param[in] agent target agent
|
|
223
|
-
* @param[in] method method to use
|
|
224
|
-
* @param[in] units sampling units
|
|
225
|
-
* @param[in] interval sampling interval in @p units
|
|
226
|
-
* @param[in] latency expected latency in microseconds for client to provide a buffer for the data
|
|
227
|
-
* copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
|
|
228
|
-
* buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
|
|
229
|
-
* how many samples are received within @p latency and call @p data_ready_callback ahead of time so
|
|
230
|
-
* that the client has @p latency time to allocate the buffer before the HSA-runtime internal
|
|
231
|
-
* buffers are full. The value of latency can be 0.
|
|
232
|
-
* @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
|
|
233
|
-
* HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
|
|
234
|
-
* perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
|
|
235
|
-
* @param[in] data_ready_callback client callback function that will be called when:
|
|
236
|
-
* 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
|
|
237
|
-
* within @p latency period.
|
|
238
|
-
* OR
|
|
239
|
-
* 2. When hsa_ven_amd_pcs_flush is called.
|
|
240
|
-
* @param[in] client_callback_data client private data to be provided back when data_ready_callback
|
|
241
|
-
* is called.
|
|
242
|
-
* @param[out] pc_sampling PC sampling session handle used to reference this session when calling
|
|
243
|
-
* hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
|
|
244
|
-
*
|
|
245
|
-
* @retval ::HSA_STATUS_SUCCESS session created successfully
|
|
246
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
|
|
247
|
-
* @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
|
|
248
|
-
* cannot handle the type requested.
|
|
249
|
-
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
|
|
250
|
-
* @retval ::HSA_STATUS_ERROR Unexpected error
|
|
251
|
-
**/
|
|
252
|
-
hsa_status_t hsa_ven_amd_pcs_create(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
|
253
|
-
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
|
|
254
|
-
size_t buffer_size,
|
|
255
|
-
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
|
|
256
|
-
void* client_callback_data, hsa_ven_amd_pcs_t* pc_sampling);
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
/**
|
|
260
|
-
* @brief Creates a PC Sampling session on @p agent. Assumes that the caller provides the
|
|
261
|
-
* @p pcs_id generated by the previous call to the underlying driver that reserved PC sampling
|
|
262
|
-
* on the @p agent.
|
|
263
|
-
*
|
|
264
|
-
* Similar to the @ref hsa_ven_amd_pcs_create with the difference that it inherits an existing
|
|
265
|
-
* PC sampling session that was previously created in the underlying driver.
|
|
266
|
-
*
|
|
267
|
-
* Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
|
|
268
|
-
* parameters must be a legal configuration value, and match the parameters that we used to create
|
|
269
|
-
* the underlying PC Sampling session in the underlying driver.
|
|
270
|
-
* A successfull call may restrict the list of possible PC sampling methods available to subsequent
|
|
271
|
-
* calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
|
|
272
|
-
* on what types of PC sampling they can perform concurrently.
|
|
273
|
-
* For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
|
|
274
|
-
* The session will be in a stopped/inactive state after this call
|
|
275
|
-
*
|
|
276
|
-
* @param[in] pcs_id ID that uniquely identifies the PC sampling session within underlying driver
|
|
277
|
-
* @param[in] agent target agent
|
|
278
|
-
* @param[in] method method to use
|
|
279
|
-
* @param[in] units sampling units
|
|
280
|
-
* @param[in] interval sampling interval in @p units
|
|
281
|
-
* @param[in] latency expected latency in microseconds for client to provide a buffer for the data
|
|
282
|
-
* copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
|
|
283
|
-
* buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
|
|
284
|
-
* how many samples are received within @p latency and call @p data_ready_callback ahead of time so
|
|
285
|
-
* that the client has @p latency time to allocate the buffer before the HSA-runtime internal
|
|
286
|
-
* buffers are full. The value of latency can be 0.
|
|
287
|
-
* @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
|
|
288
|
-
* HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
|
|
289
|
-
* perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
|
|
290
|
-
* @param[in] data_ready_callback client callback function that will be called when:
|
|
291
|
-
* 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
|
|
292
|
-
* within @p latency period.
|
|
293
|
-
* OR
|
|
294
|
-
* 2. When hsa_ven_amd_pcs_flush is called.
|
|
295
|
-
* @param[in] client_callback_data client private data to be provided back when data_ready_callback
|
|
296
|
-
* is called.
|
|
297
|
-
* @param[out] pc_sampling PC sampling session handle used to reference this session when calling
|
|
298
|
-
* hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
|
|
299
|
-
*
|
|
300
|
-
* @retval ::HSA_STATUS_SUCCESS session created successfully
|
|
301
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
|
|
302
|
-
* @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
|
|
303
|
-
* cannot handle the type requested.
|
|
304
|
-
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
|
|
305
|
-
* @retval ::HSA_STATUS_ERROR Unexpected error
|
|
306
|
-
**/
|
|
307
|
-
hsa_status_t hsa_ven_amd_pcs_create_from_id(
|
|
308
|
-
uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
|
309
|
-
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
|
|
310
|
-
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
|
|
311
|
-
hsa_ven_amd_pcs_t* pc_sampling);
|
|
312
|
-
|
|
313
|
-
/**
|
|
314
|
-
* @brief Free a PC Sampling session on @p agent
|
|
315
|
-
*
|
|
316
|
-
* Free all the resources allocated for a PC Sampling session on @p agent
|
|
317
|
-
* Internal buffers for this session will be lost.
|
|
318
|
-
* If the session was active, the session will be stopped before it is destroyed.
|
|
319
|
-
*
|
|
320
|
-
* @param[in] pc_sampling PC sampling session handle
|
|
321
|
-
*
|
|
322
|
-
* @retval ::HSA_STATUS_SUCCESS Session destroyed successfully
|
|
323
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
|
324
|
-
* @retval ::HSA_STATUS_ERROR unexpected error
|
|
325
|
-
*/
|
|
326
|
-
hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling);
|
|
327
|
-
|
|
328
|
-
/**
|
|
329
|
-
* @brief Start a PC Sampling session
|
|
330
|
-
*
|
|
331
|
-
* Activate a PC Sampling session that was previous created.
|
|
332
|
-
* The session with be in a active state after this call
|
|
333
|
-
* If the session was already active, this will result in a no-op and will return HSA_STATUS_SUCCESS
|
|
334
|
-
*
|
|
335
|
-
* @param[in] pc_sampling PC sampling session handle
|
|
336
|
-
*
|
|
337
|
-
* @retval ::HSA_STATUS_SUCCESS Session started successfully
|
|
338
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
|
339
|
-
* @retval ::HSA_STATUS_ERROR unexpected error
|
|
340
|
-
*/
|
|
341
|
-
hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling);
|
|
342
|
-
|
|
343
|
-
/**
|
|
344
|
-
* @brief Stop a PC Sampling session
|
|
345
|
-
*
|
|
346
|
-
* Stop a session that is currently active
|
|
347
|
-
* After a session is stopped HSA may still have some PC Sampling data in its internal buffers.
|
|
348
|
-
* The internal buffers can be drained using hsa_ven_amd_pcs_flush. If the internal
|
|
349
|
-
* buffers are not drained and the session is started again, the internal buffers will be available
|
|
350
|
-
* on the next data_ready_callback.
|
|
351
|
-
* If the session was already inactive, this will result in a no-op and will return
|
|
352
|
-
* HSA_STATUS_SUCCESS
|
|
353
|
-
*
|
|
354
|
-
* @param[in] pc_sampling PC sampling session handle
|
|
355
|
-
*
|
|
356
|
-
* @retval ::HSA_STATUS_SUCCESS Session stopped successfully
|
|
357
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
|
358
|
-
*/
|
|
359
|
-
hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling);
|
|
360
|
-
|
|
361
|
-
/**
|
|
362
|
-
* @brief Flush internal buffers for a PC Sampling session
|
|
363
|
-
*
|
|
364
|
-
* Drain internal buffers for a PC Sampling session. If internal buffers have available data,
|
|
365
|
-
* this trigger a data_ready_callback.
|
|
366
|
-
*
|
|
367
|
-
* The function blocks until all PC samples associated with the @p pc_sampling session
|
|
368
|
-
* generated prior to the function call have been communicated by invocations of
|
|
369
|
-
* @p data_ready_callback having completed execution.
|
|
370
|
-
*
|
|
371
|
-
* @param[in] pc_sampling PC sampling session handle
|
|
372
|
-
*
|
|
373
|
-
* @retval ::HSA_STATUS_SUCCESS Session flushed successfully
|
|
374
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
|
375
|
-
*/
|
|
376
|
-
hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling);
|
|
377
|
-
|
|
378
|
-
#define hsa_ven_amd_pc_sampling_1_00
|
|
379
|
-
|
|
380
|
-
/**
|
|
381
|
-
* @brief The function pointer table for the PC Sampling v1.00 extension. Can be returned by
|
|
382
|
-
* ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
|
|
383
|
-
*/
|
|
384
|
-
typedef struct hsa_ven_amd_pc_sampling_1_00_pfn_t {
|
|
385
|
-
hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration)(
|
|
386
|
-
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
|
|
387
|
-
void* callback_data);
|
|
388
|
-
|
|
389
|
-
hsa_status_t (*hsa_ven_amd_pcs_create)(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
|
390
|
-
hsa_ven_amd_pcs_units_t units, size_t interval,
|
|
391
|
-
size_t latency, size_t buffer_size,
|
|
392
|
-
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
|
|
393
|
-
void* client_callback_data,
|
|
394
|
-
hsa_ven_amd_pcs_t* pc_sampling);
|
|
395
|
-
|
|
396
|
-
hsa_status_t (*hsa_ven_amd_pcs_create_from_id)(
|
|
397
|
-
uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
|
398
|
-
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
|
|
399
|
-
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
|
|
400
|
-
hsa_ven_amd_pcs_t* pc_sampling);
|
|
401
|
-
|
|
402
|
-
hsa_status_t (*hsa_ven_amd_pcs_destroy)(hsa_ven_amd_pcs_t pc_sampling);
|
|
403
|
-
|
|
404
|
-
hsa_status_t (*hsa_ven_amd_pcs_start)(hsa_ven_amd_pcs_t pc_sampling);
|
|
405
|
-
|
|
406
|
-
hsa_status_t (*hsa_ven_amd_pcs_stop)(hsa_ven_amd_pcs_t pc_sampling);
|
|
407
|
-
|
|
408
|
-
hsa_status_t (*hsa_ven_amd_pcs_flush)(hsa_ven_amd_pcs_t pc_sampling);
|
|
409
|
-
|
|
410
|
-
} hsa_ven_amd_pc_sampling_1_00_pfn_t;
|
|
411
|
-
|
|
412
|
-
#ifdef __cplusplus
|
|
413
|
-
} // end extern "C" block
|
|
414
|
-
#endif /*__cplusplus*/
|
|
415
|
-
|
|
416
|
-
#endif /* HSA_VEN_AMD_PC_SAMPLING_H */
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
|
2
|
-
|
|
3
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
-
in the Software without restriction, including without limitation the rights
|
|
6
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
-
furnished to do so, subject to the following conditions:
|
|
9
|
-
|
|
10
|
-
The above copyright notice and this permission notice shall be included in
|
|
11
|
-
all copies or substantial portions of the Software.
|
|
12
|
-
|
|
13
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
19
|
-
THE SOFTWARE. */
|
|
20
|
-
|
|
21
|
-
#ifndef EXT_PROF_PROTOCOL_H_
|
|
22
|
-
#define EXT_PROF_PROTOCOL_H_
|
|
23
|
-
|
|
24
|
-
#include <stdint.h>
|
|
25
|
-
#include <stdlib.h>
|
|
26
|
-
|
|
27
|
-
/* Traced API domains */
|
|
28
|
-
typedef enum {
|
|
29
|
-
ACTIVITY_DOMAIN_HSA_API = 0, /* HSA API domain */
|
|
30
|
-
ACTIVITY_DOMAIN_HSA_OPS = 1, /* HSA async activity domain */
|
|
31
|
-
ACTIVITY_DOMAIN_HIP_OPS = 2, /* HIP async activity domain */
|
|
32
|
-
ACTIVITY_DOMAIN_HCC_OPS =
|
|
33
|
-
ACTIVITY_DOMAIN_HIP_OPS, /* HCC async activity domain */
|
|
34
|
-
ACTIVITY_DOMAIN_HIP_VDI =
|
|
35
|
-
ACTIVITY_DOMAIN_HIP_OPS, /* HIP VDI async activity domain */
|
|
36
|
-
ACTIVITY_DOMAIN_HIP_API = 3, /* HIP API domain */
|
|
37
|
-
ACTIVITY_DOMAIN_KFD_API = 4, /* KFD API domain */
|
|
38
|
-
ACTIVITY_DOMAIN_EXT_API = 5, /* External ID domain */
|
|
39
|
-
ACTIVITY_DOMAIN_ROCTX = 6, /* ROCTX domain */
|
|
40
|
-
ACTIVITY_DOMAIN_HSA_EVT = 7, /* HSA events */
|
|
41
|
-
ACTIVITY_DOMAIN_NUMBER
|
|
42
|
-
} activity_domain_t;
|
|
43
|
-
|
|
44
|
-
/* API callback type */
|
|
45
|
-
typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid,
|
|
46
|
-
const void* data, void* arg);
|
|
47
|
-
typedef uint32_t activity_kind_t;
|
|
48
|
-
typedef uint32_t activity_op_t;
|
|
49
|
-
|
|
50
|
-
/* API callback phase */
|
|
51
|
-
typedef enum {
|
|
52
|
-
ACTIVITY_API_PHASE_ENTER = 0,
|
|
53
|
-
ACTIVITY_API_PHASE_EXIT = 1
|
|
54
|
-
} activity_api_phase_t;
|
|
55
|
-
|
|
56
|
-
/* Trace record types */
|
|
57
|
-
|
|
58
|
-
/* Correlation id */
|
|
59
|
-
typedef uint64_t activity_correlation_id_t;
|
|
60
|
-
|
|
61
|
-
/* Timestamp in nanoseconds */
|
|
62
|
-
typedef uint64_t roctracer_timestamp_t;
|
|
63
|
-
|
|
64
|
-
/* Activity record type */
|
|
65
|
-
typedef struct activity_record_s {
|
|
66
|
-
uint32_t domain; /* activity domain id */
|
|
67
|
-
activity_kind_t kind; /* activity kind */
|
|
68
|
-
activity_op_t op; /* activity op */
|
|
69
|
-
union {
|
|
70
|
-
struct {
|
|
71
|
-
activity_correlation_id_t correlation_id; /* activity ID */
|
|
72
|
-
roctracer_timestamp_t begin_ns; /* host begin timestamp */
|
|
73
|
-
roctracer_timestamp_t end_ns; /* host end timestamp */
|
|
74
|
-
};
|
|
75
|
-
struct {
|
|
76
|
-
uint32_t se; /* sampled SE */
|
|
77
|
-
uint64_t cycle; /* sample cycle */
|
|
78
|
-
uint64_t pc; /* sample PC */
|
|
79
|
-
} pc_sample;
|
|
80
|
-
};
|
|
81
|
-
union {
|
|
82
|
-
struct {
|
|
83
|
-
int device_id; /* device id */
|
|
84
|
-
uint64_t queue_id; /* queue id */
|
|
85
|
-
};
|
|
86
|
-
struct {
|
|
87
|
-
uint32_t process_id; /* device id */
|
|
88
|
-
uint32_t thread_id; /* thread id */
|
|
89
|
-
};
|
|
90
|
-
struct {
|
|
91
|
-
activity_correlation_id_t external_id; /* external correlation id */
|
|
92
|
-
};
|
|
93
|
-
};
|
|
94
|
-
union {
|
|
95
|
-
size_t bytes; /* data size bytes */
|
|
96
|
-
const char* kernel_name; /* kernel name */
|
|
97
|
-
const char* mark_message;
|
|
98
|
-
};
|
|
99
|
-
} activity_record_t;
|
|
100
|
-
|
|
101
|
-
/* Activity sync callback type */
|
|
102
|
-
typedef void (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data,
|
|
103
|
-
void* arg);
|
|
104
|
-
/* Activity async callback type */
|
|
105
|
-
typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg);
|
|
106
|
-
|
|
107
|
-
#endif /* EXT_PROF_PROTOCOL_H_ */
|