triton-windows 3.2.0.post12__cp312-cp312-win_amd64.whl → 3.3.0a0.post12__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +3 -3
- triton/_internal_testing.py +59 -4
- triton/_utils.py +35 -0
- triton/backends/amd/compiler.py +121 -74
- triton/backends/amd/driver.py +77 -43
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +28 -49
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +35 -9
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +761 -284
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +9 -3
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +1391 -0
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +3 -3
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +44 -0
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +288 -0
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +110 -14
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +504 -103
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +2 -1
- triton/backends/amd/include/hip/amd_detail/host_defines.h +4 -0
- triton/backends/amd/include/hip/hip_ext.h +4 -2
- triton/backends/amd/include/hip/hip_fp8.h +33 -0
- triton/backends/amd/include/hip/hip_runtime_api.h +375 -33
- triton/backends/amd/include/hip/hip_version.h +3 -3
- triton/backends/amd/include/hip/hiprtc.h +25 -25
- triton/backends/amd/include/hsa/amd_hsa_elf.h +40 -14
- triton/backends/amd/include/hsa/hsa.h +11 -2
- triton/backends/amd/include/hsa/hsa_api_trace.h +30 -17
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +68 -0
- triton/backends/amd/include/hsa/hsa_ext_amd.h +83 -27
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +46 -46
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +416 -0
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +84 -4
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +260 -0
- triton/backends/amd/include/roctracer/hsa_prof_str.h +51 -19
- triton/backends/amd/lib/asanrtl.bc +0 -0
- triton/backends/compiler.py +25 -225
- triton/backends/driver.py +7 -2
- triton/backends/nvidia/bin/ptxas.exe +0 -0
- triton/backends/nvidia/compiler.py +135 -90
- triton/backends/nvidia/driver.c +0 -1
- triton/backends/nvidia/driver.py +135 -49
- triton/backends/nvidia/include/cuda.h +2162 -241
- triton/backends/nvidia/lib/x64/cuda.lib +0 -0
- triton/compiler/__init__.py +2 -2
- triton/compiler/code_generator.py +334 -231
- triton/compiler/compiler.py +77 -66
- triton/language/__init__.py +22 -5
- triton/language/core.py +448 -74
- triton/language/extra/cuda/_experimental_tma.py +3 -5
- triton/language/math.py +1 -1
- triton/language/random.py +2 -1
- triton/language/semantic.py +206 -52
- triton/language/standard.py +35 -18
- triton/runtime/_allocation.py +32 -0
- triton/runtime/autotuner.py +27 -32
- triton/runtime/build.py +1 -48
- triton/runtime/cache.py +6 -6
- triton/runtime/errors.py +10 -0
- triton/runtime/interpreter.py +179 -45
- triton/runtime/jit.py +149 -190
- triton/testing.py +39 -11
- triton/tools/compile.py +27 -20
- triton/tools/{compile.c → extra/cuda/compile.c} +1 -0
- triton/tools/mxfp.py +301 -0
- {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/METADATA +5 -2
- {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/RECORD +68 -59
- {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/top_level.txt +2 -0
- /triton/tools/{compile.h → extra/cuda/compile.h} +0 -0
- {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/WHEEL +0 -0
|
@@ -67,32 +67,32 @@ typedef enum hiprtcResult {
|
|
|
67
67
|
*/
|
|
68
68
|
|
|
69
69
|
typedef enum hiprtcJIT_option {
|
|
70
|
-
HIPRTC_JIT_MAX_REGISTERS = 0, ///< Maximum registers may be used in a thread, passed to compiler
|
|
71
|
-
HIPRTC_JIT_THREADS_PER_BLOCK, ///< Number of thread per block
|
|
72
|
-
HIPRTC_JIT_WALL_TIME, ///< Value for total wall clock time
|
|
73
|
-
HIPRTC_JIT_INFO_LOG_BUFFER, ///< Pointer to the buffer with logged information
|
|
74
|
-
HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES, ///< Size of the buffer in bytes for logged info
|
|
75
|
-
HIPRTC_JIT_ERROR_LOG_BUFFER, ///< Pointer to the buffer with logged error(s)
|
|
76
|
-
HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, ///< Size of the buffer in bytes for logged error(s)
|
|
77
|
-
HIPRTC_JIT_OPTIMIZATION_LEVEL, ///< Value of optimization level for generated codes
|
|
78
|
-
HIPRTC_JIT_TARGET_FROM_HIPCONTEXT, ///< The target context, which is the default
|
|
79
|
-
HIPRTC_JIT_TARGET, ///< JIT target
|
|
80
|
-
HIPRTC_JIT_FALLBACK_STRATEGY, ///< Fallback strategy
|
|
81
|
-
HIPRTC_JIT_GENERATE_DEBUG_INFO, ///< Generate debug information
|
|
82
|
-
HIPRTC_JIT_LOG_VERBOSE, ///< Generate log verbose
|
|
83
|
-
HIPRTC_JIT_GENERATE_LINE_INFO, ///< Generate line number information
|
|
84
|
-
HIPRTC_JIT_CACHE_MODE, ///< Set cache mode
|
|
85
|
-
HIPRTC_JIT_NEW_SM3X_OPT, ///< @deprecated
|
|
86
|
-
HIPRTC_JIT_FAST_COMPILE, ///< Set fast compile
|
|
87
|
-
HIPRTC_JIT_GLOBAL_SYMBOL_NAMES, ///< Array of device symbol names to be relocated to the host
|
|
88
|
-
HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS, ///< Array of host addresses to be relocated to the device
|
|
89
|
-
HIPRTC_JIT_GLOBAL_SYMBOL_COUNT, ///< Number of symbol count.
|
|
90
|
-
HIPRTC_JIT_LTO, ///< @deprecated
|
|
91
|
-
HIPRTC_JIT_FTZ, ///< @deprecated
|
|
92
|
-
HIPRTC_JIT_PREC_DIV, ///< @deprecated
|
|
70
|
+
HIPRTC_JIT_MAX_REGISTERS = 0, ///< CUDA Only Maximum registers may be used in a thread, passed to compiler
|
|
71
|
+
HIPRTC_JIT_THREADS_PER_BLOCK, ///< CUDA Only Number of thread per block
|
|
72
|
+
HIPRTC_JIT_WALL_TIME, ///< CUDA Only Value for total wall clock time
|
|
73
|
+
HIPRTC_JIT_INFO_LOG_BUFFER, ///< CUDA Only Pointer to the buffer with logged information
|
|
74
|
+
HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES, ///< CUDA Only Size of the buffer in bytes for logged info
|
|
75
|
+
HIPRTC_JIT_ERROR_LOG_BUFFER, ///< CUDA Only Pointer to the buffer with logged error(s)
|
|
76
|
+
HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, ///< CUDA Only Size of the buffer in bytes for logged error(s)
|
|
77
|
+
HIPRTC_JIT_OPTIMIZATION_LEVEL, ///< Value of optimization level for generated codes, acceptable options -O0, -O1, -O2, -O3
|
|
78
|
+
HIPRTC_JIT_TARGET_FROM_HIPCONTEXT, ///< CUDA Only The target context, which is the default
|
|
79
|
+
HIPRTC_JIT_TARGET, ///< CUDA Only JIT target
|
|
80
|
+
HIPRTC_JIT_FALLBACK_STRATEGY, ///< CUDA Only Fallback strategy
|
|
81
|
+
HIPRTC_JIT_GENERATE_DEBUG_INFO, ///< CUDA Only Generate debug information
|
|
82
|
+
HIPRTC_JIT_LOG_VERBOSE, ///< CUDA Only Generate log verbose
|
|
83
|
+
HIPRTC_JIT_GENERATE_LINE_INFO, ///< CUDA Only Generate line number information
|
|
84
|
+
HIPRTC_JIT_CACHE_MODE, ///< CUDA Only Set cache mode
|
|
85
|
+
HIPRTC_JIT_NEW_SM3X_OPT, ///< @deprecated CUDA Only New SM3X option.
|
|
86
|
+
HIPRTC_JIT_FAST_COMPILE, ///< CUDA Only Set fast compile
|
|
87
|
+
HIPRTC_JIT_GLOBAL_SYMBOL_NAMES, ///< CUDA Only Array of device symbol names to be relocated to the host
|
|
88
|
+
HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS, ///< CUDA Only Array of host addresses to be relocated to the device
|
|
89
|
+
HIPRTC_JIT_GLOBAL_SYMBOL_COUNT, ///< CUDA Only Number of symbol count.
|
|
90
|
+
HIPRTC_JIT_LTO, ///< @deprecated CUDA Only Enable link-time optimization for device code
|
|
91
|
+
HIPRTC_JIT_FTZ, ///< @deprecated CUDA Only Set single-precision denormals.
|
|
92
|
+
HIPRTC_JIT_PREC_DIV, ///< @deprecated CUDA Only Set single-precision floating-point division and
|
|
93
93
|
///< reciprocals
|
|
94
|
-
HIPRTC_JIT_PREC_SQRT, ///< @deprecated
|
|
95
|
-
HIPRTC_JIT_FMA, ///< @deprecated
|
|
94
|
+
HIPRTC_JIT_PREC_SQRT, ///< @deprecated CUDA Only Set single-precision floating-point square root
|
|
95
|
+
HIPRTC_JIT_FMA, ///< @deprecated CUDA Only Enable floating-point multiplies and adds/subtracts operations
|
|
96
96
|
HIPRTC_JIT_NUM_OPTIONS, ///< Number of options
|
|
97
97
|
HIPRTC_JIT_IR_TO_ISA_OPT_EXT = 10000, ///< Linker options to be passed on to compiler
|
|
98
98
|
/// @note Only supported for the AMD platform.
|
|
@@ -75,7 +75,8 @@ enum {
|
|
|
75
75
|
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
|
|
76
76
|
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
|
|
77
77
|
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
|
|
78
|
-
ELFABIVERSION_AMDGPU_HSA_V5 = 3
|
|
78
|
+
ELFABIVERSION_AMDGPU_HSA_V5 = 3,
|
|
79
|
+
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
|
|
79
80
|
};
|
|
80
81
|
|
|
81
82
|
// AMDGPU specific e_flags.
|
|
@@ -87,6 +88,7 @@ enum : unsigned {
|
|
|
87
88
|
EF_AMDGPU_MACH_NONE = 0x000,
|
|
88
89
|
|
|
89
90
|
// AMDGCN-based processors.
|
|
91
|
+
// clang-format off
|
|
90
92
|
EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
|
|
91
93
|
EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
|
|
92
94
|
EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
|
|
@@ -127,14 +129,25 @@ enum : unsigned {
|
|
|
127
129
|
EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
|
|
128
130
|
EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046,
|
|
129
131
|
EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047,
|
|
132
|
+
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
|
|
133
|
+
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
|
|
130
134
|
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
|
|
131
135
|
EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
|
|
132
136
|
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
|
|
137
|
+
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
|
|
138
|
+
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
|
|
133
139
|
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
|
|
140
|
+
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
|
|
141
|
+
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
|
|
142
|
+
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
|
|
143
|
+
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
|
|
144
|
+
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
|
|
145
|
+
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X55 = 0x055,
|
|
146
|
+
// clang-format on
|
|
134
147
|
|
|
135
148
|
// First/last AMDGCN-based processors.
|
|
136
149
|
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
|
|
137
|
-
EF_AMDGPU_MACH_AMDGCN_LAST =
|
|
150
|
+
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC,
|
|
138
151
|
|
|
139
152
|
// Indicates if the "xnack" target feature is enabled for all code contained
|
|
140
153
|
// in the object.
|
|
@@ -160,8 +173,7 @@ enum : unsigned {
|
|
|
160
173
|
|
|
161
174
|
// XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
|
|
162
175
|
//
|
|
163
|
-
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4
|
|
164
|
-
// ELFABIVERSION_AMDGPU_HSA_V5.
|
|
176
|
+
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
|
|
165
177
|
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
|
|
166
178
|
// XNACK is not supported.
|
|
167
179
|
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
|
|
@@ -174,8 +186,7 @@ enum : unsigned {
|
|
|
174
186
|
|
|
175
187
|
// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
|
|
176
188
|
//
|
|
177
|
-
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4
|
|
178
|
-
// ELFABIVERSION_AMDGPU_HSA_V5.
|
|
189
|
+
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
|
|
179
190
|
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
|
|
180
191
|
// SRAMECC is not supported.
|
|
181
192
|
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
|
|
@@ -185,6 +196,21 @@ enum : unsigned {
|
|
|
185
196
|
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
|
|
186
197
|
// SRAMECC is on.
|
|
187
198
|
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
|
|
199
|
+
|
|
200
|
+
// Generic target versioning. This is contained in the list byte of EFLAGS.
|
|
201
|
+
EF_AMDGPU_GENERIC_VERSION = 0xff000000,
|
|
202
|
+
EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
|
|
203
|
+
EF_AMDGPU_GENERIC_VERSION_MIN = 1,
|
|
204
|
+
EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
// ELF Relocation types for AMDGPU.
|
|
208
|
+
enum : unsigned {
|
|
209
|
+
R_AMDGPU_ABS32_LO = 1,
|
|
210
|
+
R_AMDGPU_ABS32_HI = 2,
|
|
211
|
+
R_AMDGPU_ABS64 = 3,
|
|
212
|
+
R_AMDGPU_ABS32 = 6,
|
|
213
|
+
R_AMDGPU_RELATIVE64 = 13,
|
|
188
214
|
};
|
|
189
215
|
|
|
190
216
|
} // end namespace ELF
|
|
@@ -246,14 +272,14 @@ typedef enum {
|
|
|
246
272
|
// ELF Symbol Flag Enumeration Values.
|
|
247
273
|
#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
|
|
248
274
|
|
|
249
|
-
// AMD GPU Relocation Type Enumeration Values.
|
|
250
|
-
#define
|
|
251
|
-
#define
|
|
252
|
-
#define
|
|
253
|
-
#define
|
|
254
|
-
#define
|
|
255
|
-
#define
|
|
256
|
-
#define
|
|
275
|
+
// Legacy/V1 AMD GPU Relocation Type Enumeration Values.
|
|
276
|
+
#define R_AMDGPU_V1_NONE 0
|
|
277
|
+
#define R_AMDGPU_V1_32_LOW 1
|
|
278
|
+
#define R_AMDGPU_V1_32_HIGH 2
|
|
279
|
+
#define R_AMDGPU_V1_64 3
|
|
280
|
+
#define R_AMDGPU_V1_INIT_SAMPLER 4
|
|
281
|
+
#define R_AMDGPU_V1_INIT_IMAGE 5
|
|
282
|
+
#define R_AMDGPU_V1_RELATIVE64 13
|
|
257
283
|
|
|
258
284
|
// AMD GPU Note Type Enumeration Values.
|
|
259
285
|
#define NT_AMD_HSA_CODE_OBJECT_VERSION 1
|
|
@@ -598,10 +598,14 @@ typedef enum {
|
|
|
598
598
|
* AqlProfile extension.
|
|
599
599
|
*/
|
|
600
600
|
HSA_EXTENSION_AMD_AQLPROFILE = 0x202,
|
|
601
|
+
/**
|
|
602
|
+
* PC Sampling extension.
|
|
603
|
+
*/
|
|
604
|
+
HSA_EXTENSION_AMD_PC_SAMPLING = 0x203,
|
|
601
605
|
/**
|
|
602
606
|
* Last AMD extension.
|
|
603
607
|
*/
|
|
604
|
-
HSA_AMD_LAST_EXTENSION =
|
|
608
|
+
HSA_AMD_LAST_EXTENSION = 0x203
|
|
605
609
|
} hsa_extension_t;
|
|
606
610
|
|
|
607
611
|
/**
|
|
@@ -5656,7 +5660,12 @@ typedef enum {
|
|
|
5656
5660
|
* undefined if the symbol is not an indirect function. The type of this
|
|
5657
5661
|
* attribute is uint32_t.
|
|
5658
5662
|
*/
|
|
5659
|
-
HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
|
|
5663
|
+
HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16,
|
|
5664
|
+
/**
|
|
5665
|
+
* Wavefront size used by the kernel. The value of this attribute is either
|
|
5666
|
+
* 32 or 64. The type of this attribute is uint32_t.
|
|
5667
|
+
*/
|
|
5668
|
+
HSA_CODE_SYMBOL_INFO_KERNEL_WAVEFRONT_SIZE = 19
|
|
5660
5669
|
} hsa_code_symbol_info_t;
|
|
5661
5670
|
|
|
5662
5671
|
/**
|
|
@@ -44,39 +44,26 @@
|
|
|
44
44
|
#define HSA_RUNTIME_INC_HSA_API_TRACE_H
|
|
45
45
|
|
|
46
46
|
#include "hsa.h"
|
|
47
|
+
#include "hsa_api_trace_version.h"
|
|
47
48
|
#ifdef AMD_INTERNAL_BUILD
|
|
48
49
|
#include "hsa_ext_image.h"
|
|
49
50
|
#include "hsa_ext_amd.h"
|
|
50
51
|
#include "hsa_ext_finalize.h"
|
|
51
52
|
#include "hsa_amd_tool.h"
|
|
53
|
+
#include "hsa_ven_amd_pc_sampling.h"
|
|
52
54
|
#else
|
|
53
55
|
#include "inc/hsa_ext_image.h"
|
|
54
56
|
#include "inc/hsa_ext_amd.h"
|
|
55
57
|
#include "inc/hsa_ext_finalize.h"
|
|
56
58
|
#include "inc/hsa_amd_tool.h"
|
|
59
|
+
#include "inc/hsa_ven_amd_pc_sampling.h"
|
|
57
60
|
#endif
|
|
58
61
|
|
|
59
62
|
#include <string.h>
|
|
60
63
|
#include <assert.h>
|
|
61
64
|
#include <stddef.h>
|
|
62
65
|
|
|
63
|
-
//
|
|
64
|
-
#define HSA_API_TABLE_MAJOR_VERSION 0x03
|
|
65
|
-
#define HSA_CORE_API_TABLE_MAJOR_VERSION 0x02
|
|
66
|
-
#define HSA_AMD_EXT_API_TABLE_MAJOR_VERSION 0x02
|
|
67
|
-
#define HSA_FINALIZER_API_TABLE_MAJOR_VERSION 0x02
|
|
68
|
-
#define HSA_IMAGE_API_TABLE_MAJOR_VERSION 0x02
|
|
69
|
-
#define HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION 0x01
|
|
70
|
-
#define HSA_TOOLS_API_TABLE_MAJOR_VERSION 0x01
|
|
71
|
-
|
|
72
|
-
// Step Ids of the Api tables exported by Hsa Core Runtime
|
|
73
|
-
#define HSA_API_TABLE_STEP_VERSION 0x00
|
|
74
|
-
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
|
|
75
|
-
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x01
|
|
76
|
-
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
|
|
77
|
-
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x00
|
|
78
|
-
#define HSA_AQLPROFILE_API_TABLE_STEP_VERSION 0x00
|
|
79
|
-
#define HSA_TOOLS_API_TABLE_STEP_VERSION 0x00
|
|
66
|
+
// Table MAJOR_VERSION and STEP_VERSION defines have moved to hsa_api_trace_version.h
|
|
80
67
|
|
|
81
68
|
// Min function used to copy Api Tables
|
|
82
69
|
static inline uint32_t Min(const uint32_t a, const uint32_t b) {
|
|
@@ -191,6 +178,19 @@ struct ImageExtTable {
|
|
|
191
178
|
decltype(hsa_ext_image_create_with_layout)* hsa_ext_image_create_with_layout_fn;
|
|
192
179
|
};
|
|
193
180
|
|
|
181
|
+
// Table to export HSA PC Sampling Extension Apis
|
|
182
|
+
struct PcSamplingExtTable {
|
|
183
|
+
ApiTableVersion version;
|
|
184
|
+
decltype(hsa_ven_amd_pcs_iterate_configuration)* hsa_ven_amd_pcs_iterate_configuration_fn;
|
|
185
|
+
decltype(hsa_ven_amd_pcs_create)* hsa_ven_amd_pcs_create_fn;
|
|
186
|
+
decltype(hsa_ven_amd_pcs_create_from_id)* hsa_ven_amd_pcs_create_from_id_fn;
|
|
187
|
+
decltype(hsa_ven_amd_pcs_destroy)* hsa_ven_amd_pcs_destroy_fn;
|
|
188
|
+
decltype(hsa_ven_amd_pcs_start)* hsa_ven_amd_pcs_start_fn;
|
|
189
|
+
decltype(hsa_ven_amd_pcs_stop)* hsa_ven_amd_pcs_stop_fn;
|
|
190
|
+
decltype(hsa_ven_amd_pcs_flush)* hsa_ven_amd_pcs_flush_fn;
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
|
|
194
194
|
// Table to export AMD Extension Apis
|
|
195
195
|
struct AmdExtTable {
|
|
196
196
|
ApiTableVersion version;
|
|
@@ -263,6 +263,8 @@ struct AmdExtTable {
|
|
|
263
263
|
decltype(hsa_amd_vmem_get_alloc_properties_from_handle)*
|
|
264
264
|
hsa_amd_vmem_get_alloc_properties_from_handle_fn;
|
|
265
265
|
decltype(hsa_amd_agent_set_async_scratch_limit)* hsa_amd_agent_set_async_scratch_limit_fn;
|
|
266
|
+
decltype(hsa_amd_queue_get_info)* hsa_amd_queue_get_info_fn;
|
|
267
|
+
decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn;
|
|
266
268
|
};
|
|
267
269
|
|
|
268
270
|
// Table to export HSA Core Runtime Apis
|
|
@@ -464,6 +466,9 @@ struct HsaApiTable {
|
|
|
464
466
|
|
|
465
467
|
// Table of function pointers for tools to use
|
|
466
468
|
ToolsApiTable* tools_;
|
|
469
|
+
|
|
470
|
+
// Table of function pointers to AMD PC Sampling Extension
|
|
471
|
+
PcSamplingExtTable* pc_sampling_ext_;
|
|
467
472
|
};
|
|
468
473
|
|
|
469
474
|
// Structure containing instances of different api tables
|
|
@@ -474,6 +479,7 @@ struct HsaApiTableContainer {
|
|
|
474
479
|
FinalizerExtTable finalizer_ext;
|
|
475
480
|
ImageExtTable image_ext;
|
|
476
481
|
ToolsApiTable tools;
|
|
482
|
+
PcSamplingExtTable pc_sampling_ext;
|
|
477
483
|
|
|
478
484
|
// Default initialization of a container instance
|
|
479
485
|
HsaApiTableContainer() {
|
|
@@ -505,6 +511,11 @@ struct HsaApiTableContainer {
|
|
|
505
511
|
tools.version.minor_id = sizeof(ToolsApiTable);
|
|
506
512
|
tools.version.step_id = HSA_TOOLS_API_TABLE_STEP_VERSION;
|
|
507
513
|
root.tools_ = &tools;
|
|
514
|
+
|
|
515
|
+
pc_sampling_ext.version.major_id = HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION;
|
|
516
|
+
pc_sampling_ext.version.minor_id = sizeof(PcSamplingExtTable);
|
|
517
|
+
pc_sampling_ext.version.step_id = HSA_PC_SAMPLING_API_TABLE_STEP_VERSION;
|
|
518
|
+
root.pc_sampling_ext_ = &pc_sampling_ext;
|
|
508
519
|
}
|
|
509
520
|
};
|
|
510
521
|
|
|
@@ -562,5 +573,7 @@ static void inline copyTables(const HsaApiTable* src, HsaApiTable* dest) {
|
|
|
562
573
|
copyElement(&dest->image_ext_->version, &src->image_ext_->version);
|
|
563
574
|
if ((offsetof(HsaApiTable, tools_) < dest->version.minor_id))
|
|
564
575
|
copyElement(&dest->tools_->version, &src->tools_->version);
|
|
576
|
+
if ((offsetof(HsaApiTable, pc_sampling_ext_) < dest->version.minor_id))
|
|
577
|
+
copyElement(&dest->pc_sampling_ext_->version, &src->pc_sampling_ext_->version);
|
|
565
578
|
}
|
|
566
579
|
#endif
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
////////////////////////////////////////////////////////////////////////////////
|
|
2
|
+
//
|
|
3
|
+
// The University of Illinois/NCSA
|
|
4
|
+
// Open Source License (NCSA)
|
|
5
|
+
//
|
|
6
|
+
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
|
7
|
+
//
|
|
8
|
+
// Developed by:
|
|
9
|
+
//
|
|
10
|
+
// AMD Research and AMD HSA Software Development
|
|
11
|
+
//
|
|
12
|
+
// Advanced Micro Devices, Inc.
|
|
13
|
+
//
|
|
14
|
+
// www.amd.com
|
|
15
|
+
//
|
|
16
|
+
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
17
|
+
// of this software and associated documentation files (the "Software"), to
|
|
18
|
+
// deal with the Software without restriction, including without limitation
|
|
19
|
+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
20
|
+
// and/or sell copies of the Software, and to permit persons to whom the
|
|
21
|
+
// Software is furnished to do so, subject to the following conditions:
|
|
22
|
+
//
|
|
23
|
+
// - Redistributions of source code must retain the above copyright notice,
|
|
24
|
+
// this list of conditions and the following disclaimers.
|
|
25
|
+
// - Redistributions in binary form must reproduce the above copyright
|
|
26
|
+
// notice, this list of conditions and the following disclaimers in
|
|
27
|
+
// the documentation and/or other materials provided with the distribution.
|
|
28
|
+
// - Neither the names of Advanced Micro Devices, Inc,
|
|
29
|
+
// nor the names of its contributors may be used to endorse or promote
|
|
30
|
+
// products derived from this Software without specific prior written
|
|
31
|
+
// permission.
|
|
32
|
+
//
|
|
33
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
34
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
35
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
36
|
+
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
37
|
+
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
38
|
+
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
39
|
+
// DEALINGS WITH THE SOFTWARE.
|
|
40
|
+
//
|
|
41
|
+
////////////////////////////////////////////////////////////////////////////////
|
|
42
|
+
|
|
43
|
+
#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
|
|
44
|
+
#define HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
|
|
45
|
+
|
|
46
|
+
// CODE IN THIS FILE **MUST** BE C-COMPATIBLE
|
|
47
|
+
|
|
48
|
+
// Major Ids of the Api tables exported by Hsa Core Runtime
|
|
49
|
+
#define HSA_API_TABLE_MAJOR_VERSION 0x03
|
|
50
|
+
#define HSA_CORE_API_TABLE_MAJOR_VERSION 0x02
|
|
51
|
+
#define HSA_AMD_EXT_API_TABLE_MAJOR_VERSION 0x02
|
|
52
|
+
#define HSA_FINALIZER_API_TABLE_MAJOR_VERSION 0x02
|
|
53
|
+
#define HSA_IMAGE_API_TABLE_MAJOR_VERSION 0x02
|
|
54
|
+
#define HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION 0x01
|
|
55
|
+
#define HSA_TOOLS_API_TABLE_MAJOR_VERSION 0x01
|
|
56
|
+
#define HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION 0x01
|
|
57
|
+
|
|
58
|
+
// Step Ids of the Api tables exported by Hsa Core Runtime
|
|
59
|
+
#define HSA_API_TABLE_STEP_VERSION 0x01
|
|
60
|
+
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
|
|
61
|
+
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x03
|
|
62
|
+
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
|
|
63
|
+
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x00
|
|
64
|
+
#define HSA_AQLPROFILE_API_TABLE_STEP_VERSION 0x00
|
|
65
|
+
#define HSA_TOOLS_API_TABLE_STEP_VERSION 0x00
|
|
66
|
+
#define HSA_PC_SAMPLING_API_TABLE_STEP_VERSION 0x00
|
|
67
|
+
|
|
68
|
+
#endif // HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
|
|
@@ -47,16 +47,19 @@
|
|
|
47
47
|
|
|
48
48
|
#include "hsa.h"
|
|
49
49
|
#include "hsa_ext_image.h"
|
|
50
|
+
#include "hsa_ven_amd_pc_sampling.h"
|
|
50
51
|
|
|
51
|
-
|
|
52
|
+
/**
|
|
52
53
|
* - 1.0 - initial version
|
|
53
54
|
* - 1.1 - dmabuf export
|
|
54
55
|
* - 1.2 - hsa_amd_memory_async_copy_on_engine
|
|
55
56
|
* - 1.3 - HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED pool
|
|
56
57
|
* - 1.4 - Virtual Memory API
|
|
58
|
+
* - 1.5 - hsa_amd_agent_info: HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES
|
|
59
|
+
* - 1.6 - Virtual Memory API: hsa_amd_vmem_address_reserve_align
|
|
57
60
|
*/
|
|
58
61
|
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
|
|
59
|
-
#define HSA_AMD_INTERFACE_VERSION_MINOR
|
|
62
|
+
#define HSA_AMD_INTERFACE_VERSION_MINOR 6
|
|
60
63
|
|
|
61
64
|
#ifdef __cplusplus
|
|
62
65
|
extern "C" {
|
|
@@ -220,6 +223,11 @@ enum {
|
|
|
220
223
|
* Exceeded number of VGPRs available on this agent
|
|
221
224
|
*/
|
|
222
225
|
HSA_STATUS_ERROR_OUT_OF_REGISTERS = 45,
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Resource is busy or temporarily unavailable
|
|
229
|
+
*/
|
|
230
|
+
HSA_STATUS_ERROR_RESOURCE_BUSY = 46,
|
|
223
231
|
};
|
|
224
232
|
|
|
225
233
|
/**
|
|
@@ -1175,7 +1183,11 @@ typedef enum hsa_amd_memory_pool_flag_s {
|
|
|
1175
1183
|
* connection. Atomic memory operations on these memory buffers are not
|
|
1176
1184
|
* guaranteed to be visible at system scope.
|
|
1177
1185
|
*/
|
|
1178
|
-
HSA_AMD_MEMORY_POOL_PCIE_FLAG = 1,
|
|
1186
|
+
HSA_AMD_MEMORY_POOL_PCIE_FLAG = (1 << 0),
|
|
1187
|
+
/**
|
|
1188
|
+
* Allocates physically contiguous memory
|
|
1189
|
+
*/
|
|
1190
|
+
HSA_AMD_MEMORY_POOL_CONTIGUOUS_FLAG = (1 << 1),
|
|
1179
1191
|
|
|
1180
1192
|
} hsa_amd_memory_pool_flag_t;
|
|
1181
1193
|
|
|
@@ -2782,7 +2794,7 @@ hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* d
|
|
|
2782
2794
|
*/
|
|
2783
2795
|
hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf);
|
|
2784
2796
|
|
|
2785
|
-
|
|
2797
|
+
/**
|
|
2786
2798
|
* @brief Allocate a reserved address range
|
|
2787
2799
|
*
|
|
2788
2800
|
* Reserve a virtual address range. The size must be a multiple of the system page size.
|
|
@@ -2802,11 +2814,39 @@ hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf);
|
|
|
2802
2814
|
*
|
|
2803
2815
|
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Insufficient resources to allocate an address
|
|
2804
2816
|
* range of this size.
|
|
2817
|
+
*
|
|
2818
|
+
* Note that this API will be deprecated in a future release and replaced by
|
|
2819
|
+
* hsa_amd_vmem_address_reserve_align
|
|
2805
2820
|
*/
|
|
2806
2821
|
hsa_status_t hsa_amd_vmem_address_reserve(void** va, size_t size, uint64_t address,
|
|
2807
2822
|
uint64_t flags);
|
|
2808
2823
|
|
|
2809
|
-
|
|
2824
|
+
/**
|
|
2825
|
+
* @brief Allocate a reserved address range
|
|
2826
|
+
*
|
|
2827
|
+
* Reserve a virtual address range. The size must be a multiple of the system page size.
|
|
2828
|
+
* If it is not possible to allocate the address specified by @p address, then @p va will be
|
|
2829
|
+
* a different address range.
|
|
2830
|
+
* Address range should be released by calling hsa_amd_vmem_address_free.
|
|
2831
|
+
*
|
|
2832
|
+
* @param[out] va virtual address allocated
|
|
2833
|
+
* @param[in] size of address range requested
|
|
2834
|
+
* @param[in] address requested
|
|
2835
|
+
* @param[in] alignment requested. 0 for default. Must be >= page-size and a power of 2
|
|
2836
|
+
* @param[in] flags currently unsupported
|
|
2837
|
+
*
|
|
2838
|
+
* @retval ::HSA_STATUS_SUCCESS Address range allocated successfully
|
|
2839
|
+
*
|
|
2840
|
+
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
2841
|
+
* initialized.
|
|
2842
|
+
*
|
|
2843
|
+
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Insufficient resources to allocate an address
|
|
2844
|
+
* range of this size.
|
|
2845
|
+
*/
|
|
2846
|
+
hsa_status_t hsa_amd_vmem_address_reserve_align(void** va, size_t size, uint64_t address,
|
|
2847
|
+
uint64_t alignment, uint64_t flags);
|
|
2848
|
+
|
|
2849
|
+
/**
|
|
2810
2850
|
* @brief Free a reserved address range
|
|
2811
2851
|
*
|
|
2812
2852
|
* Free a previously allocated address range. The size must match the size of a previously
|
|
@@ -2840,7 +2880,7 @@ typedef enum {
|
|
|
2840
2880
|
MEMORY_TYPE_PINNED,
|
|
2841
2881
|
} hsa_amd_memory_type_t;
|
|
2842
2882
|
|
|
2843
|
-
|
|
2883
|
+
/**
|
|
2844
2884
|
* @brief Create a virtual memory handle
|
|
2845
2885
|
*
|
|
2846
2886
|
* Create a virtual memory handle within this pool
|
|
@@ -2869,7 +2909,7 @@ hsa_status_t hsa_amd_vmem_handle_create(hsa_amd_memory_pool_t pool, size_t size,
|
|
|
2869
2909
|
hsa_amd_memory_type_t type, uint64_t flags,
|
|
2870
2910
|
hsa_amd_vmem_alloc_handle_t* memory_handle);
|
|
2871
2911
|
|
|
2872
|
-
|
|
2912
|
+
/**
|
|
2873
2913
|
* @brief Release a virtual memory handle
|
|
2874
2914
|
*
|
|
2875
2915
|
* @param[in] memory handle that was previously allocated
|
|
@@ -2880,7 +2920,7 @@ hsa_status_t hsa_amd_vmem_handle_create(hsa_amd_memory_pool_t pool, size_t size,
|
|
|
2880
2920
|
*/
|
|
2881
2921
|
hsa_status_t hsa_amd_vmem_handle_release(hsa_amd_vmem_alloc_handle_t memory_handle);
|
|
2882
2922
|
|
|
2883
|
-
|
|
2923
|
+
/**
|
|
2884
2924
|
* @brief Map a virtual memory handle
|
|
2885
2925
|
*
|
|
2886
2926
|
* Map a virtual memory handle to a reserved address range. The virtual address requested must be
|
|
@@ -2906,7 +2946,7 @@ hsa_status_t hsa_amd_vmem_handle_release(hsa_amd_vmem_alloc_handle_t memory_hand
|
|
|
2906
2946
|
hsa_status_t hsa_amd_vmem_map(void* va, size_t size, size_t in_offset,
|
|
2907
2947
|
hsa_amd_vmem_alloc_handle_t memory_handle, uint64_t flags);
|
|
2908
2948
|
|
|
2909
|
-
|
|
2949
|
+
/**
|
|
2910
2950
|
* @brief Unmap a virtual memory handle
|
|
2911
2951
|
*
|
|
2912
2952
|
* Unmap previously mapped virtual address range
|
|
@@ -2929,7 +2969,7 @@ typedef struct hsa_amd_memory_access_desc_s {
|
|
|
2929
2969
|
hsa_agent_t agent_handle;
|
|
2930
2970
|
} hsa_amd_memory_access_desc_t;
|
|
2931
2971
|
|
|
2932
|
-
|
|
2972
|
+
/**
|
|
2933
2973
|
* @brief Make a memory mapping accessible
|
|
2934
2974
|
*
|
|
2935
2975
|
* Make previously mapped virtual address accessible to specific agents. @p size must be equal to
|
|
@@ -2958,7 +2998,7 @@ hsa_status_t hsa_amd_vmem_set_access(void* va, size_t size,
|
|
|
2958
2998
|
const hsa_amd_memory_access_desc_t* desc,
|
|
2959
2999
|
size_t desc_cnt);
|
|
2960
3000
|
|
|
2961
|
-
|
|
3001
|
+
/**
|
|
2962
3002
|
* @brief Get current access permissions for memory mapping
|
|
2963
3003
|
*
|
|
2964
3004
|
* Get access permissions for memory mapping for specific agent.
|
|
@@ -2979,7 +3019,7 @@ hsa_status_t hsa_amd_vmem_set_access(void* va, size_t size,
|
|
|
2979
3019
|
hsa_status_t hsa_amd_vmem_get_access(void* va, hsa_access_permission_t* perms,
|
|
2980
3020
|
hsa_agent_t agent_handle);
|
|
2981
3021
|
|
|
2982
|
-
|
|
3022
|
+
/**
|
|
2983
3023
|
* @brief Get an exportable shareable handle
|
|
2984
3024
|
*
|
|
2985
3025
|
* Get an exportable shareable handle for a memory_handle. This shareabl handle can then be used to
|
|
@@ -3002,7 +3042,7 @@ hsa_status_t hsa_amd_vmem_get_access(void* va, hsa_access_permission_t* perms,
|
|
|
3002
3042
|
hsa_status_t hsa_amd_vmem_export_shareable_handle(int* dmabuf_fd,
|
|
3003
3043
|
hsa_amd_vmem_alloc_handle_t handle,
|
|
3004
3044
|
uint64_t flags);
|
|
3005
|
-
|
|
3045
|
+
/**
|
|
3006
3046
|
* @brief Import a shareable handle
|
|
3007
3047
|
*
|
|
3008
3048
|
* Import a shareable handle for a memory handle. Importing a shareable handle that has been closed
|
|
@@ -3022,7 +3062,7 @@ hsa_status_t hsa_amd_vmem_export_shareable_handle(int* dmabuf_fd,
|
|
|
3022
3062
|
hsa_status_t hsa_amd_vmem_import_shareable_handle(int dmabuf_fd,
|
|
3023
3063
|
hsa_amd_vmem_alloc_handle_t* handle);
|
|
3024
3064
|
|
|
3025
|
-
|
|
3065
|
+
/**
|
|
3026
3066
|
* @brief Returns memory handle for mapped memory
|
|
3027
3067
|
*
|
|
3028
3068
|
* Return a memory handle for previously mapped memory. The handle will be the same value of handle
|
|
@@ -3039,19 +3079,19 @@ hsa_status_t hsa_amd_vmem_import_shareable_handle(int dmabuf_fd,
|
|
|
3039
3079
|
hsa_status_t hsa_amd_vmem_retain_alloc_handle(hsa_amd_vmem_alloc_handle_t* memory_handle,
|
|
3040
3080
|
void* addr);
|
|
3041
3081
|
|
|
3042
|
-
|
|
3043
|
-
* @brief Returns the current allocation properties of a handle
|
|
3044
|
-
*
|
|
3045
|
-
* Returns the allocation properties of an existing handle
|
|
3046
|
-
*
|
|
3047
|
-
* @param[in] memory_handle memory handle to be queried
|
|
3048
|
-
* @param[out] pool memory pool that owns this handle
|
|
3049
|
-
* @param[out] memory type
|
|
3050
|
-
|
|
3051
|
-
* @retval ::HSA_STATUS_SUCCESS
|
|
3052
|
-
*
|
|
3053
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION Invalid memory_handle
|
|
3054
|
-
*/
|
|
3082
|
+
/**
|
|
3083
|
+
* @brief Returns the current allocation properties of a handle
|
|
3084
|
+
*
|
|
3085
|
+
* Returns the allocation properties of an existing handle
|
|
3086
|
+
*
|
|
3087
|
+
* @param[in] memory_handle memory handle to be queried
|
|
3088
|
+
* @param[out] pool memory pool that owns this handle
|
|
3089
|
+
* @param[out] memory type
|
|
3090
|
+
|
|
3091
|
+
* @retval ::HSA_STATUS_SUCCESS
|
|
3092
|
+
*
|
|
3093
|
+
* @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION Invalid memory_handle
|
|
3094
|
+
*/
|
|
3055
3095
|
hsa_status_t hsa_amd_vmem_get_alloc_properties_from_handle(
|
|
3056
3096
|
hsa_amd_vmem_alloc_handle_t memory_handle, hsa_amd_memory_pool_t* pool,
|
|
3057
3097
|
hsa_amd_memory_type_t* type);
|
|
@@ -3083,6 +3123,22 @@ hsa_status_t hsa_amd_vmem_get_alloc_properties_from_handle(
|
|
|
3083
3123
|
*/
|
|
3084
3124
|
hsa_status_t HSA_API hsa_amd_agent_set_async_scratch_limit(hsa_agent_t agent, size_t threshold);
|
|
3085
3125
|
|
|
3126
|
+
typedef enum {
|
|
3127
|
+
/*
|
|
3128
|
+
* Returns the agent that owns the underlying HW queue.
|
|
3129
|
+
* The type of this attribute is hsa_agent_t.
|
|
3130
|
+
*/
|
|
3131
|
+
HSA_AMD_QUEUE_INFO_AGENT,
|
|
3132
|
+
/*
|
|
3133
|
+
* Returns the doorbell ID of the completion signal of the queue
|
|
3134
|
+
* The type of this attribute is uint64_t.
|
|
3135
|
+
*/
|
|
3136
|
+
HSA_AMD_QUEUE_INFO_DOORBELL_ID,
|
|
3137
|
+
} hsa_queue_info_attribute_t;
|
|
3138
|
+
|
|
3139
|
+
hsa_status_t hsa_amd_queue_get_info(hsa_queue_t* queue, hsa_queue_info_attribute_t attribute,
|
|
3140
|
+
void* value);
|
|
3141
|
+
|
|
3086
3142
|
#ifdef __cplusplus
|
|
3087
3143
|
} // end extern "C" block
|
|
3088
3144
|
#endif
|