triton-windows 3.3.1.post19__cp311-cp311-win_amd64.whl → 3.3.1.post21__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (108) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/backends/amd/driver.py +6 -1
  3. triton/backends/nvidia/compiler.py +1 -3
  4. triton/backends/nvidia/driver.py +7 -3
  5. triton/runtime/autotuner.py +2 -2
  6. triton/runtime/build.py +5 -5
  7. triton/windows_utils.py +11 -4
  8. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/METADATA +1 -1
  9. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/RECORD +11 -108
  10. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  11. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
  12. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
  13. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
  14. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  15. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  16. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  17. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
  18. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  19. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
  20. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  21. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  22. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  23. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  24. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  25. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  26. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  27. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  28. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
  29. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
  30. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  31. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  32. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  33. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  34. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  35. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  36. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  37. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
  38. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  39. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  40. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  41. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  42. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  43. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
  44. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
  45. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
  46. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  47. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  48. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  49. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  50. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  51. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  52. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  53. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  54. triton/backends/amd/include/hip/device_functions.h +0 -38
  55. triton/backends/amd/include/hip/driver_types.h +0 -468
  56. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  57. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  58. triton/backends/amd/include/hip/hip_common.h +0 -100
  59. triton/backends/amd/include/hip/hip_complex.h +0 -38
  60. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  61. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  62. triton/backends/amd/include/hip/hip_ext.h +0 -161
  63. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  64. triton/backends/amd/include/hip/hip_fp8.h +0 -33
  65. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  66. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  67. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  68. triton/backends/amd/include/hip/hip_profile.h +0 -27
  69. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  70. triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
  71. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  72. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  73. triton/backends/amd/include/hip/hip_version.h +0 -17
  74. triton/backends/amd/include/hip/hiprtc.h +0 -421
  75. triton/backends/amd/include/hip/library_types.h +0 -78
  76. triton/backends/amd/include/hip/math_functions.h +0 -42
  77. triton/backends/amd/include/hip/surface_types.h +0 -63
  78. triton/backends/amd/include/hip/texture_types.h +0 -194
  79. triton/backends/amd/include/hsa/Brig.h +0 -1131
  80. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  81. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
  82. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  83. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  84. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  85. triton/backends/amd/include/hsa/hsa.h +0 -5738
  86. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  87. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
  88. triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
  89. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
  90. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  91. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  92. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  93. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  94. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
  95. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  96. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
  97. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
  98. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
  99. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  100. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  101. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  102. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  103. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  104. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  105. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  106. triton/backends/amd/include/roctracer/roctx.h +0 -229
  107. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/WHEEL +0 -0
  108. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/top_level.txt +0 -0
@@ -1,488 +0,0 @@
1
- ////////////////////////////////////////////////////////////////////////////////
2
- //
3
- // The University of Illinois/NCSA
4
- // Open Source License (NCSA)
5
- //
6
- // Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
7
- //
8
- // Developed by:
9
- //
10
- // AMD Research and AMD HSA Software Development
11
- //
12
- // Advanced Micro Devices, Inc.
13
- //
14
- // www.amd.com
15
- //
16
- // Permission is hereby granted, free of charge, to any person obtaining a copy
17
- // of this software and associated documentation files (the "Software"), to
18
- // deal with the Software without restriction, including without limitation
19
- // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20
- // and/or sell copies of the Software, and to permit persons to whom the
21
- // Software is furnished to do so, subject to the following conditions:
22
- //
23
- // - Redistributions of source code must retain the above copyright notice,
24
- // this list of conditions and the following disclaimers.
25
- // - Redistributions in binary form must reproduce the above copyright
26
- // notice, this list of conditions and the following disclaimers in
27
- // the documentation and/or other materials provided with the distribution.
28
- // - Neither the names of Advanced Micro Devices, Inc,
29
- // nor the names of its contributors may be used to endorse or promote
30
- // products derived from this Software without specific prior written
31
- // permission.
32
- //
33
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36
- // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37
- // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38
- // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39
- // DEALINGS WITH THE SOFTWARE.
40
- //
41
- ////////////////////////////////////////////////////////////////////////////////
42
-
43
- #ifndef OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
44
- #define OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
45
-
46
- #include <stdint.h>
47
- #include "hsa.h"
48
-
49
- #define HSA_AQLPROFILE_VERSION_MAJOR 2
50
- #define HSA_AQLPROFILE_VERSION_MINOR 0
51
-
52
- #ifdef __cplusplus
53
- extern "C" {
54
- #endif // __cplusplus
55
-
56
- ////////////////////////////////////////////////////////////////////////////////
57
- // Library version
58
- uint32_t hsa_ven_amd_aqlprofile_version_major();
59
- uint32_t hsa_ven_amd_aqlprofile_version_minor();
60
-
61
- ///////////////////////////////////////////////////////////////////////
62
- // Library API:
63
- // The library provides helper methods for instantiation of
64
- // the profile context object and for populating of the start
65
- // and stop AQL packets. The profile object contains a profiling
66
- // events list and needed for profiling buffers descriptors,
67
- // a command buffer and an output data buffer. To check if there
68
- // was an error the library methods return a status code. Also
69
- // the library provides methods for querying required buffers
70
- // attributes, to validate the event attributes and to get profiling
71
- // output data.
72
- //
73
- // Returned status:
74
- // hsa_status_t – HSA status codes are used from hsa.h header
75
- //
76
- // Supported profiling features:
77
- //
78
- // Supported profiling events
79
- typedef enum {
80
- HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC = 0,
81
- HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE = 1,
82
- } hsa_ven_amd_aqlprofile_event_type_t;
83
-
84
- // Supported performance counters (PMC) blocks
85
- // The block ID is the same for a block instances set, for example
86
- // each block instance from the TCC block set, TCC0, TCC1, …, TCCN
87
- // will have the same block ID HSA_VEN_AMD_AQLPROFILE_BLOCKS_TCC.
88
- typedef enum {
89
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC = 0,
90
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF = 1,
91
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS = 2,
92
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM = 3,
93
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE = 4,
94
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI = 5,
95
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ = 6,
96
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS = 7,
97
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM = 8,
98
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX = 9,
99
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA = 10,
100
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA = 11,
101
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC = 12,
102
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP = 13,
103
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD = 14,
104
- // Memory related blocks
105
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB = 15,
106
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB = 16,
107
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM = 17,
108
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ = 18,
109
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2 = 19,
110
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR = 20,
111
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC = 21,
112
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2 = 22,
113
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA = 23,
114
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB = 24,
115
- // System blocks
116
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA = 25,
117
- // GFX10 added blocks
118
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A = 26,
119
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C = 27,
120
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A = 28,
121
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C = 29,
122
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR = 30,
123
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GUS = 31,
124
-
125
- // UMC & MMEA System Blocks
126
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC = 32,
127
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MMEA = 33,
128
-
129
- HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER
130
- } hsa_ven_amd_aqlprofile_block_name_t;
131
-
132
- // PMC event object structure
133
- // ‘counter_id’ value is specified in GFXIPs perfcounter user guides
134
- // which is the counters select value, “Performance Counters Selection”
135
- // chapter.
136
- typedef struct {
137
- hsa_ven_amd_aqlprofile_block_name_t block_name;
138
- uint32_t block_index;
139
- uint32_t counter_id;
140
- } hsa_ven_amd_aqlprofile_event_t;
141
-
142
- // Check if event is valid for the specific GPU
143
- hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
144
- hsa_agent_t agent, // HSA handle for the profiling GPU
145
- const hsa_ven_amd_aqlprofile_event_t* event, // [in] Pointer on validated event
146
- bool* result); // [out] True if the event valid, False otherwise
147
-
148
- // Profiling parameters
149
- // All parameters are generic and if not applicable for a specific
150
- // profile configuration then error status will be returned.
151
- typedef enum {
152
- /**
153
- * Select the target compute unit (wgp) for profiling.
154
- */
155
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
156
- /**
157
- * VMID Mask
158
- */
159
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
160
- /**
161
- * Legacy. Deprecated.
162
- */
163
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
164
- /**
165
- * Legacy. Deprecated.
166
- */
167
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
168
- /**
169
- * Legacy. Deprecated.
170
- */
171
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
172
- /**
173
- * Shader engine mask for selection.
174
- */
175
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5,
176
- /**
177
- * Legacy. Deprecated.
178
- */
179
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE = 6,
180
- /**
181
- * Legacy. Deprecated.
182
- */
183
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT = 7,
184
- /**
185
- * Set SIMD Mask (GFX9) or SIMD ID for collection (Navi)
186
- */
187
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION = 8,
188
- /**
189
- * Set true for occupancy collection only.
190
- */
191
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE = 9,
192
- /**
193
- * ATT collection max data size, in MB. Shared among shader engines.
194
- */
195
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE = 10,
196
- /**
197
- * Mask of which compute units to generate perfcounters. GFX9 only.
198
- */
199
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK = 240,
200
- /**
201
- * Select collection period for perfcounters. GFX9 only.
202
- */
203
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL = 241,
204
- /**
205
- * Select perfcounter ID (SQ block) for collection. GFX9 only.
206
- */
207
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME = 242,
208
- } hsa_ven_amd_aqlprofile_parameter_name_t;
209
-
210
- // Profile parameter object
211
- typedef struct {
212
- hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
213
- uint32_t value;
214
- } hsa_ven_amd_aqlprofile_parameter_t;
215
-
216
- typedef enum {
217
- HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_0 = 0,
218
- HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_1,
219
- HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2,
220
- HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_3
221
- } hsa_ven_amd_aqlprofile_att_marker_channel_t;
222
-
223
- //
224
- // Profile context object:
225
- // The library provides a profile object structure which contains
226
- // the events array, a buffer for the profiling start/stop commands
227
- // and a buffer for the output data.
228
- // The buffers are specified by the buffer descriptors and allocated
229
- // by the application. The buffers allocation attributes, the command
230
- // buffer size, the PMC output buffer size as well as profiling output
231
- // data can be get using the generic get profile info helper _get_info.
232
- //
233
- // Buffer descriptor
234
- typedef struct {
235
- void* ptr;
236
- uint32_t size;
237
- } hsa_ven_amd_aqlprofile_descriptor_t;
238
-
239
- // Profile context object structure, contains profiling events list and
240
- // needed for profiling buffers descriptors, a command buffer and
241
- // an output data buffer
242
- typedef struct {
243
- hsa_agent_t agent; // GFXIP handle
244
- hsa_ven_amd_aqlprofile_event_type_t type; // Events type
245
- const hsa_ven_amd_aqlprofile_event_t* events; // Events array
246
- uint32_t event_count; // Events count
247
- const hsa_ven_amd_aqlprofile_parameter_t* parameters; // Parameters array
248
- uint32_t parameter_count; // Parameters count
249
- hsa_ven_amd_aqlprofile_descriptor_t output_buffer; // Output buffer
250
- hsa_ven_amd_aqlprofile_descriptor_t command_buffer; // PM4 commands
251
- } hsa_ven_amd_aqlprofile_profile_t;
252
-
253
- //
254
- // AQL packets populating methods:
255
- // The helper methods to populate provided by the application START and
256
- // STOP AQL packets which the application is required to submit before and
257
- // after profiled GPU task packets respectively.
258
- //
259
- // AQL Vendor Specific packet which carries a PM4 command
260
- typedef struct {
261
- uint16_t header;
262
- uint16_t pm4_command[27];
263
- hsa_signal_t completion_signal;
264
- } hsa_ext_amd_aql_pm4_packet_t;
265
-
266
- // Method to populate the provided AQL packet with profiling start commands
267
- // Only 'pm4_command' fields of the packet are set and the application
268
- // is responsible to set Vendor Specific header type a completion signal
269
- hsa_status_t hsa_ven_amd_aqlprofile_start(
270
- hsa_ven_amd_aqlprofile_profile_t* profile, // [in/out] profile contex object
271
- hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); // [out] profile start AQL packet
272
-
273
- // Method to populate the provided AQL packet with profiling stop commands
274
- // Only 'pm4_command' fields of the packet are set and the application
275
- // is responsible to set Vendor Specific header type and a completion signal
276
- hsa_status_t hsa_ven_amd_aqlprofile_stop(
277
- const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object
278
- hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet); // [out] profile stop AQL packet
279
-
280
- // Method to populate the provided AQL packet with profiling read commands
281
- // Only 'pm4_command' fields of the packet are set and the application
282
- // is responsible to set Vendor Specific header type and a completion signal
283
- hsa_status_t hsa_ven_amd_aqlprofile_read(
284
- const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object
285
- hsa_ext_amd_aql_pm4_packet_t* aql_read_packet); // [out] profile stop AQL packet
286
-
287
- // Legacy devices, PM4 profiling packet size
288
- const unsigned HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192;
289
- // Legacy devices, converting the profiling AQL packet to PM4 packet blob
290
- hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4(
291
- const hsa_ext_amd_aql_pm4_packet_t* aql_packet, // [in] AQL packet
292
- void* data); // [out] PM4 packet blob
293
-
294
- // Method to add a marker (correlation ID) into the ATT buffer.
295
- hsa_status_t hsa_ven_amd_aqlprofile_att_marker(
296
- hsa_ven_amd_aqlprofile_profile_t* profile, // [in/out] profile contex object
297
- hsa_ext_amd_aql_pm4_packet_t* aql_marker_packet, // [out] profile marker AQL packet
298
- uint32_t data, // [in] Data to be inserted
299
- hsa_ven_amd_aqlprofile_att_marker_channel_t channel); // [in] Comm channel
300
-
301
- //
302
- // Get profile info:
303
- // Generic method for getting various profile info including profile buffers
304
- // attributes like the command buffer size and the profiling PMC results.
305
- // It’s implied that all counters are 64bit values.
306
- //
307
- // Profile generic output data:
308
- typedef struct {
309
- uint32_t sample_id; // PMC sample or trace buffer index
310
- union {
311
- struct {
312
- hsa_ven_amd_aqlprofile_event_t event; // PMC event
313
- uint64_t result; // PMC result
314
- } pmc_data;
315
- hsa_ven_amd_aqlprofile_descriptor_t trace_data; // Trace output data descriptor
316
- };
317
- } hsa_ven_amd_aqlprofile_info_data_t;
318
-
319
- // ID query type
320
- typedef struct {
321
- const char* name;
322
- uint32_t id;
323
- uint32_t instance_count;
324
- } hsa_ven_amd_aqlprofile_id_query_t;
325
-
326
- // Profile attributes
327
- typedef enum {
328
- HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
329
- HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
330
- HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
331
- // in info_data object
332
- HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA = 3, // get_info returns trace buffer ptr/size
333
- // in info_data object
334
- HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
335
- HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
336
- // by name string using _id_query_t
337
- HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
338
- // counters enable command buffer
339
- HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD = 7, // get_info returns size/pointer for
340
- // counters disable command buffer
341
- } hsa_ven_amd_aqlprofile_info_type_t;
342
-
343
-
344
- // Definition of output data iterator callback
345
- typedef hsa_status_t (*hsa_ven_amd_aqlprofile_data_callback_t)(
346
- hsa_ven_amd_aqlprofile_info_type_t info_type, // [in] data type, PMC or trace data
347
- hsa_ven_amd_aqlprofile_info_data_t* info_data, // [in] info_data object
348
- void* callback_data); // [in/out] data passed to the callback
349
-
350
- // Method for getting the profile info
351
- hsa_status_t hsa_ven_amd_aqlprofile_get_info(
352
- const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
353
- hsa_ven_amd_aqlprofile_info_type_t attribute, // [in] requested profile attribute
354
- void* value); // [in/out] returned value
355
-
356
- // Method for iterating the events output data
357
- hsa_status_t hsa_ven_amd_aqlprofile_iterate_data(
358
- const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
359
- hsa_ven_amd_aqlprofile_data_callback_t callback, // [in] callback to iterate the output data
360
- void* data); // [in/out] data passed to the callback
361
-
362
- // Return error string
363
- hsa_status_t hsa_ven_amd_aqlprofile_error_string(
364
- const char** str); // [out] pointer on the error string
365
-
366
- /**
367
- * @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
368
- */
369
- typedef hsa_status_t(*hsa_ven_amd_aqlprofile_eventname_callback_t)(int id, const char* name);
370
- /**
371
- * @brief Iterate over all possible event coordinate IDs and their names.
372
- */
373
- hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_ids(hsa_ven_amd_aqlprofile_eventname_callback_t);
374
-
375
- /**
376
- * @brief Iterate over all event coordinates for a given agent_t and event_t.
377
- * @param position A counting sequence indicating callback number.
378
- * @param id Coordinate ID as in _iterate_event_ids.
379
- * @param extent Coordinate extent indicating maximum allowed instances.
380
- * @param coordinate The coordinate, in the range [0,extent-1].
381
- * @param name Coordinate name as in _iterate_event_ids.
382
- * @param userdata Userdata returned from _iterate_event_coord function.
383
- */
384
- typedef hsa_status_t(*hsa_ven_amd_aqlprofile_coordinate_callback_t)(
385
- int position,
386
- int id,
387
- int extent,
388
- int coordinate,
389
- const char* name,
390
- void* userdata
391
- );
392
-
393
- /**
394
- * @brief Iterate over all event coordinates for a given agent_t and event_t.
395
- * @param[in] agent HSA agent.
396
- * @param[in] event The event ID and block ID to iterate for.
397
- * @param[in] sample_id aqlprofile_info_data_t.sample_id returned from _aqlprofile_iterate_data.
398
- * @param[in] callback Callback function to return the coordinates.
399
- * @param[in] userdata Arbitrary data pointer to be sent back to the user via callback.
400
- */
401
- hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_coord(
402
- hsa_agent_t agent,
403
- hsa_ven_amd_aqlprofile_event_t event,
404
- uint32_t sample_id,
405
- hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
406
- void* userdata
407
- );
408
-
409
- /**
410
- * @brief Extension version.
411
- */
412
- #define hsa_ven_amd_aqlprofile_VERSION_MAJOR 1
413
- #define hsa_ven_amd_aqlprofile_LIB(suff) "libhsa-amd-aqlprofile" suff ".so"
414
-
415
- #ifdef HSA_LARGE_MODEL
416
- static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("64");
417
- #else
418
- static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("");
419
- #endif
420
-
421
- /**
422
- * @brief Extension function table.
423
- */
424
- typedef struct hsa_ven_amd_aqlprofile_1_00_pfn_s {
425
- uint32_t (*hsa_ven_amd_aqlprofile_version_major)();
426
- uint32_t (*hsa_ven_amd_aqlprofile_version_minor)();
427
-
428
- hsa_status_t (*hsa_ven_amd_aqlprofile_error_string)(
429
- const char** str);
430
-
431
- hsa_status_t (*hsa_ven_amd_aqlprofile_validate_event)(
432
- hsa_agent_t agent,
433
- const hsa_ven_amd_aqlprofile_event_t* event,
434
- bool* result);
435
-
436
- hsa_status_t (*hsa_ven_amd_aqlprofile_start)(
437
- hsa_ven_amd_aqlprofile_profile_t* profile,
438
- hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);
439
-
440
- hsa_status_t (*hsa_ven_amd_aqlprofile_stop)(
441
- const hsa_ven_amd_aqlprofile_profile_t* profile,
442
- hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);
443
-
444
- hsa_status_t (*hsa_ven_amd_aqlprofile_read)(
445
- const hsa_ven_amd_aqlprofile_profile_t* profile,
446
- hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);
447
-
448
- hsa_status_t (*hsa_ven_amd_aqlprofile_legacy_get_pm4)(
449
- const hsa_ext_amd_aql_pm4_packet_t* aql_packet,
450
- void* data);
451
-
452
- hsa_status_t (*hsa_ven_amd_aqlprofile_get_info)(
453
- const hsa_ven_amd_aqlprofile_profile_t* profile,
454
- hsa_ven_amd_aqlprofile_info_type_t attribute,
455
- void* value);
456
-
457
- hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_data)(
458
- const hsa_ven_amd_aqlprofile_profile_t* profile,
459
- hsa_ven_amd_aqlprofile_data_callback_t callback,
460
- void* data);
461
-
462
- hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_ids)(
463
- hsa_ven_amd_aqlprofile_eventname_callback_t
464
- );
465
-
466
- hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_coord)(
467
- hsa_agent_t agent,
468
- hsa_ven_amd_aqlprofile_event_t event,
469
- uint32_t sample_id,
470
- hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
471
- void* userdata
472
- );
473
-
474
- hsa_status_t (*hsa_ven_amd_aqlprofile_att_marker)(
475
- hsa_ven_amd_aqlprofile_profile_t* profile,
476
- hsa_ext_amd_aql_pm4_packet_t* aql_packet,
477
- uint32_t data,
478
- hsa_ven_amd_aqlprofile_att_marker_channel_t channel
479
- );
480
- } hsa_ven_amd_aqlprofile_1_00_pfn_t;
481
-
482
- typedef hsa_ven_amd_aqlprofile_1_00_pfn_t hsa_ven_amd_aqlprofile_pfn_t;
483
-
484
- #ifdef __cplusplus
485
- }
486
- #endif // __cplusplus
487
-
488
- #endif // OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_