triton-windows 3.3.1.post19__cp313-cp313-win_amd64.whl → 3.5.0.post21__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (225) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +11 -2
  3. triton/_filecheck.py +97 -0
  4. triton/_internal_testing.py +95 -18
  5. triton/_utils.py +112 -21
  6. triton/backends/__init__.py +20 -23
  7. triton/backends/amd/__init__.py +0 -0
  8. triton/backends/amd/compiler.py +161 -119
  9. triton/backends/amd/driver.c +118 -46
  10. triton/backends/amd/driver.py +274 -96
  11. triton/backends/compiler.py +7 -21
  12. triton/backends/driver.py +13 -0
  13. triton/backends/nvidia/bin/ptxas.exe +0 -0
  14. triton/backends/nvidia/compiler.py +163 -106
  15. triton/backends/nvidia/driver.c +166 -101
  16. triton/backends/nvidia/driver.py +384 -202
  17. triton/compiler/__init__.py +5 -2
  18. triton/compiler/code_generator.py +439 -231
  19. triton/compiler/compiler.py +152 -84
  20. triton/experimental/__init__.py +0 -0
  21. triton/experimental/gluon/__init__.py +5 -0
  22. triton/experimental/gluon/_compiler.py +0 -0
  23. triton/experimental/gluon/_runtime.py +102 -0
  24. triton/experimental/gluon/language/__init__.py +119 -0
  25. triton/experimental/gluon/language/_core.py +490 -0
  26. triton/experimental/gluon/language/_layouts.py +583 -0
  27. triton/experimental/gluon/language/_math.py +20 -0
  28. triton/experimental/gluon/language/_semantic.py +380 -0
  29. triton/experimental/gluon/language/_standard.py +80 -0
  30. triton/experimental/gluon/language/amd/__init__.py +4 -0
  31. triton/experimental/gluon/language/amd/_layouts.py +96 -0
  32. triton/experimental/gluon/language/amd/cdna3/__init__.py +100 -0
  33. triton/experimental/gluon/language/amd/cdna4/__init__.py +48 -0
  34. triton/experimental/gluon/language/amd/cdna4/async_copy.py +151 -0
  35. triton/experimental/gluon/language/extra/__init__.py +3 -0
  36. triton/experimental/gluon/language/nvidia/__init__.py +4 -0
  37. triton/experimental/gluon/language/nvidia/ampere/__init__.py +3 -0
  38. triton/experimental/gluon/language/nvidia/ampere/async_copy.py +74 -0
  39. triton/experimental/gluon/language/nvidia/ampere/mbarrier.py +80 -0
  40. triton/experimental/gluon/language/nvidia/blackwell/__init__.py +387 -0
  41. triton/experimental/gluon/language/nvidia/blackwell/tma.py +52 -0
  42. triton/experimental/gluon/language/nvidia/hopper/__init__.py +132 -0
  43. triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +34 -0
  44. triton/experimental/gluon/language/nvidia/hopper/tma.py +97 -0
  45. triton/experimental/gluon/nvidia/__init__.py +4 -0
  46. triton/experimental/gluon/nvidia/blackwell.py +3 -0
  47. triton/experimental/gluon/nvidia/hopper.py +45 -0
  48. triton/knobs.py +546 -0
  49. triton/language/__init__.py +50 -19
  50. triton/language/core.py +909 -572
  51. triton/language/extra/cuda/__init__.py +10 -7
  52. triton/language/extra/cuda/gdc.py +42 -0
  53. triton/language/extra/cuda/libdevice.py +394 -394
  54. triton/language/extra/cuda/utils.py +21 -21
  55. triton/language/extra/hip/__init__.py +3 -1
  56. triton/language/extra/hip/libdevice.py +120 -104
  57. triton/language/extra/hip/utils.py +35 -0
  58. triton/language/extra/libdevice.py +4 -0
  59. triton/language/math.py +65 -66
  60. triton/language/random.py +12 -2
  61. triton/language/semantic.py +1757 -1768
  62. triton/language/standard.py +127 -62
  63. triton/language/target_info.py +54 -0
  64. triton/runtime/_allocation.py +15 -3
  65. triton/runtime/_async_compile.py +55 -0
  66. triton/runtime/autotuner.py +117 -60
  67. triton/runtime/build.py +83 -17
  68. triton/runtime/cache.py +61 -47
  69. triton/runtime/driver.py +25 -47
  70. triton/runtime/interpreter.py +95 -50
  71. triton/runtime/jit.py +445 -248
  72. triton/runtime/tcc/include/_mingw.h +8 -10
  73. triton/runtime/tcc/include/assert.h +5 -0
  74. triton/runtime/tcc/include/errno.h +1 -1
  75. triton/runtime/tcc/include/float.h +21 -3
  76. triton/runtime/tcc/include/iso646.h +36 -0
  77. triton/runtime/tcc/include/limits.h +5 -0
  78. triton/runtime/tcc/include/malloc.h +2 -2
  79. triton/runtime/tcc/include/math.h +21 -261
  80. triton/runtime/tcc/include/stdalign.h +16 -0
  81. triton/runtime/tcc/include/stdarg.h +5 -70
  82. triton/runtime/tcc/include/stdatomic.h +171 -0
  83. triton/runtime/tcc/include/stddef.h +7 -19
  84. triton/runtime/tcc/include/stdlib.h +15 -4
  85. triton/runtime/tcc/include/stdnoreturn.h +7 -0
  86. triton/runtime/tcc/include/sys/stat.h +2 -2
  87. triton/runtime/tcc/include/sys/types.h +5 -0
  88. triton/runtime/tcc/include/tcc/tcc_libm.h +444 -27
  89. triton/runtime/tcc/include/tccdefs.h +342 -0
  90. triton/runtime/tcc/include/tgmath.h +89 -0
  91. triton/runtime/tcc/include/uchar.h +33 -0
  92. triton/runtime/tcc/include/unistd.h +1 -0
  93. triton/runtime/tcc/include/winapi/qos.h +72 -0
  94. triton/runtime/tcc/include/winapi/shellapi.h +59 -0
  95. triton/runtime/tcc/include/winapi/winbase.h +9 -2
  96. triton/runtime/tcc/include/winapi/wincon.h +8 -0
  97. triton/runtime/tcc/include/winapi/windows.h +1 -1
  98. triton/runtime/tcc/include/winapi/winnls.h +778 -0
  99. triton/runtime/tcc/include/winapi/winnt.h +9 -7
  100. triton/runtime/tcc/include/winapi/winsock2.h +1474 -0
  101. triton/runtime/tcc/include/winapi/ws2ipdef.h +21 -0
  102. triton/runtime/tcc/include/winapi/ws2tcpip.h +391 -0
  103. triton/runtime/tcc/lib/libtcc1.a +0 -0
  104. triton/runtime/tcc/lib/python314.def +1800 -0
  105. triton/runtime/tcc/lib/python314t.def +1809 -0
  106. triton/runtime/tcc/libtcc.dll +0 -0
  107. triton/runtime/tcc/tcc.exe +0 -0
  108. triton/testing.py +16 -12
  109. triton/tools/compile.py +62 -14
  110. triton/tools/disasm.py +3 -4
  111. triton/tools/extra/cuda/compile.c +1 -0
  112. triton/tools/extra/hip/compile.cpp +66 -0
  113. triton/tools/extra/hip/compile.h +13 -0
  114. triton/tools/ragged_tma.py +92 -0
  115. triton/tools/tensor_descriptor.py +34 -0
  116. triton/windows_utils.py +52 -81
  117. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/METADATA +8 -4
  118. triton_windows-3.5.0.post21.dist-info/RECORD +217 -0
  119. triton_windows-3.5.0.post21.dist-info/entry_points.txt +3 -0
  120. triton_windows-3.5.0.post21.dist-info/licenses/LICENSE +23 -0
  121. triton_windows-3.5.0.post21.dist-info/top_level.txt +1 -0
  122. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  123. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
  124. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
  125. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
  126. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  127. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  128. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  129. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
  130. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  131. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
  132. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  133. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  134. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  135. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  136. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  137. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  138. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  139. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  140. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
  141. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
  142. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  143. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  144. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  145. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  146. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  147. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  148. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  149. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
  150. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  151. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  152. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  153. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  154. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  155. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
  156. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
  157. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
  158. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  159. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  160. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  161. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  162. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  163. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  164. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  165. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  166. triton/backends/amd/include/hip/device_functions.h +0 -38
  167. triton/backends/amd/include/hip/driver_types.h +0 -468
  168. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  169. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  170. triton/backends/amd/include/hip/hip_common.h +0 -100
  171. triton/backends/amd/include/hip/hip_complex.h +0 -38
  172. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  173. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  174. triton/backends/amd/include/hip/hip_ext.h +0 -161
  175. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  176. triton/backends/amd/include/hip/hip_fp8.h +0 -33
  177. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  178. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  179. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  180. triton/backends/amd/include/hip/hip_profile.h +0 -27
  181. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  182. triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
  183. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  184. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  185. triton/backends/amd/include/hip/hip_version.h +0 -17
  186. triton/backends/amd/include/hip/hiprtc.h +0 -421
  187. triton/backends/amd/include/hip/library_types.h +0 -78
  188. triton/backends/amd/include/hip/math_functions.h +0 -42
  189. triton/backends/amd/include/hip/surface_types.h +0 -63
  190. triton/backends/amd/include/hip/texture_types.h +0 -194
  191. triton/backends/amd/include/hsa/Brig.h +0 -1131
  192. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  193. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
  194. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  195. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  196. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  197. triton/backends/amd/include/hsa/hsa.h +0 -5738
  198. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  199. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
  200. triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
  201. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
  202. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  203. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  204. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  205. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  206. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
  207. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  208. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
  209. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
  210. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
  211. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  212. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  213. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  214. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  215. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  216. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  217. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  218. triton/backends/amd/include/roctracer/roctx.h +0 -229
  219. triton/language/_utils.py +0 -21
  220. triton/language/extra/cuda/_experimental_tma.py +0 -106
  221. triton/runtime/tcc/lib/libtcc1-64.a +0 -0
  222. triton/tools/experimental_descriptor.py +0 -32
  223. triton_windows-3.3.1.post19.dist-info/RECORD +0 -260
  224. triton_windows-3.3.1.post19.dist-info/top_level.txt +0 -14
  225. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/WHEEL +0 -0
@@ -1,91 +0,0 @@
1
- #ifndef HSA_RUNTIME_AMD_TOOL_EVENTS_H_
2
- #define HSA_RUNTIME_AMD_TOOL_EVENTS_H_
3
-
4
- // Insert license header
5
-
6
- #include <stddef.h>
7
- #include <stdint.h>
8
- #include "hsa.h"
9
-
10
-
11
- typedef enum {
12
- HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE = 0,
13
- HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE =
14
- (1 << 0), // This scratch allocation is only valid for 1 dispatch.
15
- HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT =
16
- (1 << 1), // Used alternate scratch instead of main scratch
17
- } hsa_amd_event_scratch_alloc_flag_t;
18
-
19
- typedef enum {
20
- HSA_AMD_TOOL_EVENT_MIN = 0,
21
-
22
- // Scratch memory tracking
23
- HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_START,
24
- HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_END,
25
- HSA_AMD_TOOL_EVENT_SCRATCH_FREE_START,
26
- HSA_AMD_TOOL_EVENT_SCRATCH_FREE_END,
27
- HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_START,
28
- HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_END,
29
-
30
- // Add new events above ^
31
- HSA_AMD_TOOL_EVENT_MAX
32
- } hsa_amd_tool_event_kind_t;
33
-
34
- typedef struct {
35
- hsa_amd_tool_event_kind_t kind;
36
- } hsa_amd_tool_event_none_t;
37
-
38
- typedef struct {
39
- hsa_amd_tool_event_kind_t kind;
40
- const hsa_queue_t* queue;
41
- hsa_amd_event_scratch_alloc_flag_t flags;
42
- uint64_t dispatch_id; // Dispatch ID of the AQL packet that needs more scratch memory
43
- } hsa_amd_event_scratch_alloc_start_t;
44
-
45
- typedef struct {
46
- hsa_amd_tool_event_kind_t kind;
47
- const hsa_queue_t* queue;
48
- hsa_amd_event_scratch_alloc_flag_t flags;
49
- uint64_t dispatch_id; // Dispatch ID of the AQL packet that needs more scratch memory
50
- size_t size; // Amount of scratch allocated - in bytes
51
- size_t num_slots; // limit of number of waves
52
- } hsa_amd_event_scratch_alloc_end_t;
53
-
54
- typedef struct {
55
- hsa_amd_tool_event_kind_t kind;
56
- const hsa_queue_t* queue;
57
- hsa_amd_event_scratch_alloc_flag_t flags;
58
- } hsa_amd_event_scratch_free_start_t;
59
-
60
- typedef struct {
61
- hsa_amd_tool_event_kind_t kind;
62
- const hsa_queue_t* queue;
63
- hsa_amd_event_scratch_alloc_flag_t flags;
64
- } hsa_amd_event_scratch_free_end_t;
65
-
66
- typedef struct {
67
- hsa_amd_tool_event_kind_t kind;
68
- const hsa_queue_t* queue;
69
- hsa_amd_event_scratch_alloc_flag_t flags;
70
- } hsa_amd_event_scratch_async_reclaim_start_t;
71
-
72
- typedef struct {
73
- hsa_amd_tool_event_kind_t kind;
74
- const hsa_queue_t* queue;
75
- hsa_amd_event_scratch_alloc_flag_t flags;
76
- } hsa_amd_event_scratch_async_reclaim_end_t;
77
-
78
- typedef union {
79
- const hsa_amd_tool_event_none_t* none;
80
- const hsa_amd_event_scratch_alloc_start_t* scratch_alloc_start;
81
- const hsa_amd_event_scratch_alloc_end_t* scratch_alloc_end;
82
- const hsa_amd_event_scratch_free_start_t* scratch_free_start;
83
- const hsa_amd_event_scratch_free_end_t* scratch_free_end;
84
- const hsa_amd_event_scratch_async_reclaim_start_t* scratch_async_reclaim_start;
85
- const hsa_amd_event_scratch_async_reclaim_end_t* scratch_async_reclaim_end;
86
- } hsa_amd_tool_event_t;
87
-
88
- typedef hsa_status_t (*hsa_amd_tool_event)(hsa_amd_tool_event_t);
89
-
90
-
91
- #endif
@@ -1,579 +0,0 @@
1
- ////////////////////////////////////////////////////////////////////////////////
2
- //
3
- // The University of Illinois/NCSA
4
- // Open Source License (NCSA)
5
- //
6
- // Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
7
- //
8
- // Developed by:
9
- //
10
- // AMD Research and AMD HSA Software Development
11
- //
12
- // Advanced Micro Devices, Inc.
13
- //
14
- // www.amd.com
15
- //
16
- // Permission is hereby granted, free of charge, to any person obtaining a copy
17
- // of this software and associated documentation files (the "Software"), to
18
- // deal with the Software without restriction, including without limitation
19
- // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20
- // and/or sell copies of the Software, and to permit persons to whom the
21
- // Software is furnished to do so, subject to the following conditions:
22
- //
23
- // - Redistributions of source code must retain the above copyright notice,
24
- // this list of conditions and the following disclaimers.
25
- // - Redistributions in binary form must reproduce the above copyright
26
- // notice, this list of conditions and the following disclaimers in
27
- // the documentation and/or other materials provided with the distribution.
28
- // - Neither the names of Advanced Micro Devices, Inc,
29
- // nor the names of its contributors may be used to endorse or promote
30
- // products derived from this Software without specific prior written
31
- // permission.
32
- //
33
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36
- // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37
- // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38
- // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39
- // DEALINGS WITH THE SOFTWARE.
40
- //
41
- ////////////////////////////////////////////////////////////////////////////////
42
-
43
- #ifndef HSA_RUNTIME_INC_HSA_API_TRACE_H
44
- #define HSA_RUNTIME_INC_HSA_API_TRACE_H
45
-
46
- #include "hsa.h"
47
- #include "hsa_api_trace_version.h"
48
- #ifdef AMD_INTERNAL_BUILD
49
- #include "hsa_ext_image.h"
50
- #include "hsa_ext_amd.h"
51
- #include "hsa_ext_finalize.h"
52
- #include "hsa_amd_tool.h"
53
- #include "hsa_ven_amd_pc_sampling.h"
54
- #else
55
- #include "inc/hsa_ext_image.h"
56
- #include "inc/hsa_ext_amd.h"
57
- #include "inc/hsa_ext_finalize.h"
58
- #include "inc/hsa_amd_tool.h"
59
- #include "inc/hsa_ven_amd_pc_sampling.h"
60
- #endif
61
-
62
- #include <string.h>
63
- #include <assert.h>
64
- #include <stddef.h>
65
-
66
- // Table MAJOR_VERSION and STEP_VERSION defines have moved to hsa_api_trace_version.h
67
-
68
- // Min function used to copy Api Tables
69
- static inline uint32_t Min(const uint32_t a, const uint32_t b) {
70
- return (a > b) ? b : a;
71
- }
72
-
73
- // Declarations of APIs intended for use only by tools.
74
-
75
- // An AQL packet that can be put in an intercept queue to cause a callback to
76
- // be invoked when the packet is about to be submitted to the underlying
77
- // hardware queue. These packets are not copied to the underlying hardware
78
- // queue. These packets should come immediately before the regular AQL packet
79
- // they relate to. This implies that packet rewriters should always keep these
80
- // packets adjacent to the regular AQL packet that follows them.
81
- const uint32_t AMD_AQL_FORMAT_INTERCEPT_MARKER = 0xFE;
82
-
83
- struct amd_aql_intercept_marker_s;
84
-
85
- // When an intercept queue is processing rewritten packets to put them on the
86
- // underlying hardware queue, if it encounters a
87
- // AMD_AQL_FORMAT_INTERCEPT_MARKER vendor AQL packet it will call the following
88
- // handler. packet points to the packet, queue is the underlying hardware
89
- // queue, and packet_id is the packet id of the next packet to be put on the
90
- // underlying hardware queue. The intercept queue does not put these packets
91
- // onto the underlying hardware queue.
92
- typedef void (*amd_intercept_marker_handler)(const struct amd_aql_intercept_marker_s* packet,
93
- hsa_queue_t* queue, uint64_t packet_id);
94
- // An AQL vendor packet used by the intercept queue to mark the following
95
- // packet. The callback will be invoked to allow a tool to know where in the
96
- // underlying hardware queue the following packet will be placed. user_data can
97
- // be used to hold any data useful to the tool.
98
- typedef struct amd_aql_intercept_marker_s {
99
- uint16_t header; // Must have a packet type of HSA_PACKET_TYPE_VENDOR_SPECIFIC.
100
- uint8_t format; // Must be AMD_AQL_FORMAT_INTERCEPT_MARKER.
101
- uint8_t reserved[5]; // Must be 0.
102
- #ifdef HSA_LARGE_MODEL
103
- amd_intercept_marker_handler callback;
104
- #elif defined HSA_LITTLE_ENDIAN
105
- amd_intercept_marker_handler callback;
106
- uint32_t reserved1; // Must be 0.
107
- #else
108
- uint32_t reserved1; // Must be 0.
109
- amd_intercept_marker_handler callback;
110
- #endif
111
- uint64_t user_data[6];
112
- } amd_aql_intercept_marker_t;
113
-
114
- typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* pkts, uint64_t pkt_count);
115
- typedef void (*hsa_amd_queue_intercept_handler)(const void* pkts, uint64_t pkt_count,
116
- uint64_t user_pkt_index, void* data,
117
- hsa_amd_queue_intercept_packet_writer writer);
118
- hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
119
- hsa_amd_queue_intercept_handler callback,
120
- void* user_data);
121
- hsa_status_t hsa_amd_queue_intercept_create(
122
- hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
123
- void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
124
- uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
125
-
126
- typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t* queue, hsa_agent_t agent,
127
- void* data);
128
- hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifier callback,
129
- void* user_data);
130
-
131
- // Structure of Version used to identify an instance of Api table
132
- // Must be the first member (offsetof == 0) of all API tables.
133
- // This is the root of the table passing ABI.
134
- struct ApiTableVersion {
135
- uint32_t major_id;
136
- uint32_t minor_id;
137
- uint32_t step_id;
138
- uint32_t reserved;
139
- };
140
-
141
- struct ToolsApiTable {
142
- ApiTableVersion version;
143
-
144
- hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_start_fn;
145
- hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_end_fn;
146
- hsa_amd_tool_event hsa_amd_tool_scratch_event_free_start_fn;
147
- hsa_amd_tool_event hsa_amd_tool_scratch_event_free_end_fn;
148
- hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_start_fn;
149
- hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_end_fn;
150
- };
151
-
152
- // Table to export HSA Finalizer Extension Apis
153
- struct FinalizerExtTable {
154
- ApiTableVersion version;
155
- decltype(hsa_ext_program_create)* hsa_ext_program_create_fn;
156
- decltype(hsa_ext_program_destroy)* hsa_ext_program_destroy_fn;
157
- decltype(hsa_ext_program_add_module)* hsa_ext_program_add_module_fn;
158
- decltype(hsa_ext_program_iterate_modules)* hsa_ext_program_iterate_modules_fn;
159
- decltype(hsa_ext_program_get_info)* hsa_ext_program_get_info_fn;
160
- decltype(hsa_ext_program_finalize)* hsa_ext_program_finalize_fn;
161
- };
162
-
163
- // Table to export HSA Image Extension Apis
164
- struct ImageExtTable {
165
- ApiTableVersion version;
166
- decltype(hsa_ext_image_get_capability)* hsa_ext_image_get_capability_fn;
167
- decltype(hsa_ext_image_data_get_info)* hsa_ext_image_data_get_info_fn;
168
- decltype(hsa_ext_image_create)* hsa_ext_image_create_fn;
169
- decltype(hsa_ext_image_import)* hsa_ext_image_import_fn;
170
- decltype(hsa_ext_image_export)* hsa_ext_image_export_fn;
171
- decltype(hsa_ext_image_copy)* hsa_ext_image_copy_fn;
172
- decltype(hsa_ext_image_clear)* hsa_ext_image_clear_fn;
173
- decltype(hsa_ext_image_destroy)* hsa_ext_image_destroy_fn;
174
- decltype(hsa_ext_sampler_create)* hsa_ext_sampler_create_fn;
175
- decltype(hsa_ext_sampler_destroy)* hsa_ext_sampler_destroy_fn;
176
- decltype(hsa_ext_image_get_capability_with_layout)* hsa_ext_image_get_capability_with_layout_fn;
177
- decltype(hsa_ext_image_data_get_info_with_layout)* hsa_ext_image_data_get_info_with_layout_fn;
178
- decltype(hsa_ext_image_create_with_layout)* hsa_ext_image_create_with_layout_fn;
179
- };
180
-
181
- // Table to export HSA PC Sampling Extension Apis
182
- struct PcSamplingExtTable {
183
- ApiTableVersion version;
184
- decltype(hsa_ven_amd_pcs_iterate_configuration)* hsa_ven_amd_pcs_iterate_configuration_fn;
185
- decltype(hsa_ven_amd_pcs_create)* hsa_ven_amd_pcs_create_fn;
186
- decltype(hsa_ven_amd_pcs_create_from_id)* hsa_ven_amd_pcs_create_from_id_fn;
187
- decltype(hsa_ven_amd_pcs_destroy)* hsa_ven_amd_pcs_destroy_fn;
188
- decltype(hsa_ven_amd_pcs_start)* hsa_ven_amd_pcs_start_fn;
189
- decltype(hsa_ven_amd_pcs_stop)* hsa_ven_amd_pcs_stop_fn;
190
- decltype(hsa_ven_amd_pcs_flush)* hsa_ven_amd_pcs_flush_fn;
191
- };
192
-
193
-
194
- // Table to export AMD Extension Apis
195
- struct AmdExtTable {
196
- ApiTableVersion version;
197
- decltype(hsa_amd_coherency_get_type)* hsa_amd_coherency_get_type_fn;
198
- decltype(hsa_amd_coherency_set_type)* hsa_amd_coherency_set_type_fn;
199
- decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled_fn;
200
- decltype(hsa_amd_profiling_async_copy_enable) *hsa_amd_profiling_async_copy_enable_fn;
201
- decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time_fn;
202
- decltype(hsa_amd_profiling_get_async_copy_time) *hsa_amd_profiling_get_async_copy_time_fn;
203
- decltype(hsa_amd_profiling_convert_tick_to_system_domain)* hsa_amd_profiling_convert_tick_to_system_domain_fn;
204
- decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler_fn;
205
- decltype(hsa_amd_async_function)* hsa_amd_async_function_fn;
206
- decltype(hsa_amd_signal_wait_any)* hsa_amd_signal_wait_any_fn;
207
- decltype(hsa_amd_queue_cu_set_mask)* hsa_amd_queue_cu_set_mask_fn;
208
- decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info_fn;
209
- decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools_fn;
210
- decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate_fn;
211
- decltype(hsa_amd_memory_pool_free)* hsa_amd_memory_pool_free_fn;
212
- decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
213
- decltype(hsa_amd_memory_async_copy_on_engine)* hsa_amd_memory_async_copy_on_engine_fn;
214
- decltype(hsa_amd_memory_copy_engine_status)* hsa_amd_memory_copy_engine_status_fn;
215
- decltype(hsa_amd_agent_memory_pool_get_info)* hsa_amd_agent_memory_pool_get_info_fn;
216
- decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access_fn;
217
- decltype(hsa_amd_memory_pool_can_migrate)* hsa_amd_memory_pool_can_migrate_fn;
218
- decltype(hsa_amd_memory_migrate)* hsa_amd_memory_migrate_fn;
219
- decltype(hsa_amd_memory_lock)* hsa_amd_memory_lock_fn;
220
- decltype(hsa_amd_memory_unlock)* hsa_amd_memory_unlock_fn;
221
- decltype(hsa_amd_memory_fill)* hsa_amd_memory_fill_fn;
222
- decltype(hsa_amd_interop_map_buffer)* hsa_amd_interop_map_buffer_fn;
223
- decltype(hsa_amd_interop_unmap_buffer)* hsa_amd_interop_unmap_buffer_fn;
224
- decltype(hsa_amd_image_create)* hsa_amd_image_create_fn;
225
- decltype(hsa_amd_pointer_info)* hsa_amd_pointer_info_fn;
226
- decltype(hsa_amd_pointer_info_set_userdata)* hsa_amd_pointer_info_set_userdata_fn;
227
- decltype(hsa_amd_ipc_memory_create)* hsa_amd_ipc_memory_create_fn;
228
- decltype(hsa_amd_ipc_memory_attach)* hsa_amd_ipc_memory_attach_fn;
229
- decltype(hsa_amd_ipc_memory_detach)* hsa_amd_ipc_memory_detach_fn;
230
- decltype(hsa_amd_signal_create)* hsa_amd_signal_create_fn;
231
- decltype(hsa_amd_ipc_signal_create)* hsa_amd_ipc_signal_create_fn;
232
- decltype(hsa_amd_ipc_signal_attach)* hsa_amd_ipc_signal_attach_fn;
233
- decltype(hsa_amd_register_system_event_handler)* hsa_amd_register_system_event_handler_fn;
234
- decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
235
- decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
236
- decltype(hsa_amd_queue_set_priority)* hsa_amd_queue_set_priority_fn;
237
- decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
238
- decltype(hsa_amd_runtime_queue_create_register)* hsa_amd_runtime_queue_create_register_fn;
239
- decltype(hsa_amd_memory_lock_to_pool)* hsa_amd_memory_lock_to_pool_fn;
240
- decltype(hsa_amd_register_deallocation_callback)* hsa_amd_register_deallocation_callback_fn;
241
- decltype(hsa_amd_deregister_deallocation_callback)* hsa_amd_deregister_deallocation_callback_fn;
242
- decltype(hsa_amd_signal_value_pointer)* hsa_amd_signal_value_pointer_fn;
243
- decltype(hsa_amd_svm_attributes_set)* hsa_amd_svm_attributes_set_fn;
244
- decltype(hsa_amd_svm_attributes_get)* hsa_amd_svm_attributes_get_fn;
245
- decltype(hsa_amd_svm_prefetch_async)* hsa_amd_svm_prefetch_async_fn;
246
- decltype(hsa_amd_spm_acquire)* hsa_amd_spm_acquire_fn;
247
- decltype(hsa_amd_spm_release)* hsa_amd_spm_release_fn;
248
- decltype(hsa_amd_spm_set_dest_buffer)* hsa_amd_spm_set_dest_buffer_fn;
249
- decltype(hsa_amd_queue_cu_get_mask)* hsa_amd_queue_cu_get_mask_fn;
250
- decltype(hsa_amd_portable_export_dmabuf)* hsa_amd_portable_export_dmabuf_fn;
251
- decltype(hsa_amd_portable_close_dmabuf)* hsa_amd_portable_close_dmabuf_fn;
252
- decltype(hsa_amd_vmem_address_reserve)* hsa_amd_vmem_address_reserve_fn;
253
- decltype(hsa_amd_vmem_address_free)* hsa_amd_vmem_address_free_fn;
254
- decltype(hsa_amd_vmem_handle_create)* hsa_amd_vmem_handle_create_fn;
255
- decltype(hsa_amd_vmem_handle_release)* hsa_amd_vmem_handle_release_fn;
256
- decltype(hsa_amd_vmem_map)* hsa_amd_vmem_map_fn;
257
- decltype(hsa_amd_vmem_unmap)* hsa_amd_vmem_unmap_fn;
258
- decltype(hsa_amd_vmem_set_access)* hsa_amd_vmem_set_access_fn;
259
- decltype(hsa_amd_vmem_get_access)* hsa_amd_vmem_get_access_fn;
260
- decltype(hsa_amd_vmem_export_shareable_handle)* hsa_amd_vmem_export_shareable_handle_fn;
261
- decltype(hsa_amd_vmem_import_shareable_handle)* hsa_amd_vmem_import_shareable_handle_fn;
262
- decltype(hsa_amd_vmem_retain_alloc_handle)* hsa_amd_vmem_retain_alloc_handle_fn;
263
- decltype(hsa_amd_vmem_get_alloc_properties_from_handle)*
264
- hsa_amd_vmem_get_alloc_properties_from_handle_fn;
265
- decltype(hsa_amd_agent_set_async_scratch_limit)* hsa_amd_agent_set_async_scratch_limit_fn;
266
- decltype(hsa_amd_queue_get_info)* hsa_amd_queue_get_info_fn;
267
- decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn;
268
- };
269
-
270
- // Table to export HSA Core Runtime Apis
271
- struct CoreApiTable {
272
- ApiTableVersion version;
273
- decltype(hsa_init)* hsa_init_fn;
274
- decltype(hsa_shut_down)* hsa_shut_down_fn;
275
- decltype(hsa_system_get_info)* hsa_system_get_info_fn;
276
- decltype(hsa_system_extension_supported)* hsa_system_extension_supported_fn;
277
- decltype(hsa_system_get_extension_table)* hsa_system_get_extension_table_fn;
278
- decltype(hsa_iterate_agents)* hsa_iterate_agents_fn;
279
- decltype(hsa_agent_get_info)* hsa_agent_get_info_fn;
280
- decltype(hsa_queue_create)* hsa_queue_create_fn;
281
- decltype(hsa_soft_queue_create)* hsa_soft_queue_create_fn;
282
- decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
283
- decltype(hsa_queue_inactivate)* hsa_queue_inactivate_fn;
284
- decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
285
- decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
286
- decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
287
- decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
288
- decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
289
- decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
290
- decltype(hsa_queue_cas_write_index_scacq_screl)* hsa_queue_cas_write_index_scacq_screl_fn;
291
- decltype(hsa_queue_cas_write_index_scacquire)* hsa_queue_cas_write_index_scacquire_fn;
292
- decltype(hsa_queue_cas_write_index_relaxed)* hsa_queue_cas_write_index_relaxed_fn;
293
- decltype(hsa_queue_cas_write_index_screlease)* hsa_queue_cas_write_index_screlease_fn;
294
- decltype(hsa_queue_add_write_index_scacq_screl)* hsa_queue_add_write_index_scacq_screl_fn;
295
- decltype(hsa_queue_add_write_index_scacquire)* hsa_queue_add_write_index_scacquire_fn;
296
- decltype(hsa_queue_add_write_index_relaxed)* hsa_queue_add_write_index_relaxed_fn;
297
- decltype(hsa_queue_add_write_index_screlease)* hsa_queue_add_write_index_screlease_fn;
298
- decltype(hsa_queue_store_read_index_relaxed)* hsa_queue_store_read_index_relaxed_fn;
299
- decltype(hsa_queue_store_read_index_screlease)* hsa_queue_store_read_index_screlease_fn;
300
- decltype(hsa_agent_iterate_regions)* hsa_agent_iterate_regions_fn;
301
- decltype(hsa_region_get_info)* hsa_region_get_info_fn;
302
- decltype(hsa_agent_get_exception_policies)* hsa_agent_get_exception_policies_fn;
303
- decltype(hsa_agent_extension_supported)* hsa_agent_extension_supported_fn;
304
- decltype(hsa_memory_register)* hsa_memory_register_fn;
305
- decltype(hsa_memory_deregister)* hsa_memory_deregister_fn;
306
- decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
307
- decltype(hsa_memory_free)* hsa_memory_free_fn;
308
- decltype(hsa_memory_copy)* hsa_memory_copy_fn;
309
- decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
310
- decltype(hsa_signal_create)* hsa_signal_create_fn;
311
- decltype(hsa_signal_destroy)* hsa_signal_destroy_fn;
312
- decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed_fn;
313
- decltype(hsa_signal_load_scacquire)* hsa_signal_load_scacquire_fn;
314
- decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
315
- decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease_fn;
316
- decltype(hsa_signal_wait_relaxed)* hsa_signal_wait_relaxed_fn;
317
- decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire_fn;
318
- decltype(hsa_signal_and_relaxed)* hsa_signal_and_relaxed_fn;
319
- decltype(hsa_signal_and_scacquire)* hsa_signal_and_scacquire_fn;
320
- decltype(hsa_signal_and_screlease)* hsa_signal_and_screlease_fn;
321
- decltype(hsa_signal_and_scacq_screl)* hsa_signal_and_scacq_screl_fn;
322
- decltype(hsa_signal_or_relaxed)* hsa_signal_or_relaxed_fn;
323
- decltype(hsa_signal_or_scacquire)* hsa_signal_or_scacquire_fn;
324
- decltype(hsa_signal_or_screlease)* hsa_signal_or_screlease_fn;
325
- decltype(hsa_signal_or_scacq_screl)* hsa_signal_or_scacq_screl_fn;
326
- decltype(hsa_signal_xor_relaxed)* hsa_signal_xor_relaxed_fn;
327
- decltype(hsa_signal_xor_scacquire)* hsa_signal_xor_scacquire_fn;
328
- decltype(hsa_signal_xor_screlease)* hsa_signal_xor_screlease_fn;
329
- decltype(hsa_signal_xor_scacq_screl)* hsa_signal_xor_scacq_screl_fn;
330
- decltype(hsa_signal_exchange_relaxed)* hsa_signal_exchange_relaxed_fn;
331
- decltype(hsa_signal_exchange_scacquire)* hsa_signal_exchange_scacquire_fn;
332
- decltype(hsa_signal_exchange_screlease)* hsa_signal_exchange_screlease_fn;
333
- decltype(hsa_signal_exchange_scacq_screl)* hsa_signal_exchange_scacq_screl_fn;
334
- decltype(hsa_signal_add_relaxed)* hsa_signal_add_relaxed_fn;
335
- decltype(hsa_signal_add_scacquire)* hsa_signal_add_scacquire_fn;
336
- decltype(hsa_signal_add_screlease)* hsa_signal_add_screlease_fn;
337
- decltype(hsa_signal_add_scacq_screl)* hsa_signal_add_scacq_screl_fn;
338
- decltype(hsa_signal_subtract_relaxed)* hsa_signal_subtract_relaxed_fn;
339
- decltype(hsa_signal_subtract_scacquire)* hsa_signal_subtract_scacquire_fn;
340
- decltype(hsa_signal_subtract_screlease)* hsa_signal_subtract_screlease_fn;
341
- decltype(hsa_signal_subtract_scacq_screl)* hsa_signal_subtract_scacq_screl_fn;
342
- decltype(hsa_signal_cas_relaxed)* hsa_signal_cas_relaxed_fn;
343
- decltype(hsa_signal_cas_scacquire)* hsa_signal_cas_scacquire_fn;
344
- decltype(hsa_signal_cas_screlease)* hsa_signal_cas_screlease_fn;
345
- decltype(hsa_signal_cas_scacq_screl)* hsa_signal_cas_scacq_screl_fn;
346
-
347
- //===--- Instruction Set Architecture -----------------------------------===//
348
-
349
- decltype(hsa_isa_from_name)* hsa_isa_from_name_fn;
350
- // Deprecated since v1.1.
351
- decltype(hsa_isa_get_info)* hsa_isa_get_info_fn;
352
- // Deprecated since v1.1.
353
- decltype(hsa_isa_compatible)* hsa_isa_compatible_fn;
354
-
355
- //===--- Code Objects (deprecated) --------------------------------------===//
356
-
357
- // Deprecated since v1.1.
358
- decltype(hsa_code_object_serialize)* hsa_code_object_serialize_fn;
359
- // Deprecated since v1.1.
360
- decltype(hsa_code_object_deserialize)* hsa_code_object_deserialize_fn;
361
- // Deprecated since v1.1.
362
- decltype(hsa_code_object_destroy)* hsa_code_object_destroy_fn;
363
- // Deprecated since v1.1.
364
- decltype(hsa_code_object_get_info)* hsa_code_object_get_info_fn;
365
- // Deprecated since v1.1.
366
- decltype(hsa_code_object_get_symbol)* hsa_code_object_get_symbol_fn;
367
- // Deprecated since v1.1.
368
- decltype(hsa_code_symbol_get_info)* hsa_code_symbol_get_info_fn;
369
- // Deprecated since v1.1.
370
- decltype(hsa_code_object_iterate_symbols)* hsa_code_object_iterate_symbols_fn;
371
-
372
- //===--- Executable -----------------------------------------------------===//
373
-
374
- // Deprecated since v1.1.
375
- decltype(hsa_executable_create)* hsa_executable_create_fn;
376
- decltype(hsa_executable_destroy)* hsa_executable_destroy_fn;
377
- // Deprecated since v1.1.
378
- decltype(hsa_executable_load_code_object)* hsa_executable_load_code_object_fn;
379
- decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
380
- decltype(hsa_executable_get_info)* hsa_executable_get_info_fn;
381
- decltype(hsa_executable_global_variable_define)*
382
- hsa_executable_global_variable_define_fn;
383
- decltype(hsa_executable_agent_global_variable_define)*
384
- hsa_executable_agent_global_variable_define_fn;
385
- decltype(hsa_executable_readonly_variable_define)*
386
- hsa_executable_readonly_variable_define_fn;
387
- decltype(hsa_executable_validate)* hsa_executable_validate_fn;
388
- // Deprecated since v1.1.
389
- decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol_fn;
390
- decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info_fn;
391
- // Deprecated since v1.1.
392
- decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols_fn;
393
-
394
- //===--- Runtime Notifications ------------------------------------------===//
395
-
396
- decltype(hsa_status_string)* hsa_status_string_fn;
397
-
398
- // Start HSA v1.1 additions
399
- decltype(hsa_extension_get_name)* hsa_extension_get_name_fn;
400
- decltype(hsa_system_major_extension_supported)* hsa_system_major_extension_supported_fn;
401
- decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table_fn;
402
- decltype(hsa_agent_major_extension_supported)* hsa_agent_major_extension_supported_fn;
403
- decltype(hsa_cache_get_info)* hsa_cache_get_info_fn;
404
- decltype(hsa_agent_iterate_caches)* hsa_agent_iterate_caches_fn;
405
- decltype(hsa_signal_silent_store_relaxed)* hsa_signal_silent_store_relaxed_fn;
406
- decltype(hsa_signal_silent_store_screlease)* hsa_signal_silent_store_screlease_fn;
407
- decltype(hsa_signal_group_create)* hsa_signal_group_create_fn;
408
- decltype(hsa_signal_group_destroy)* hsa_signal_group_destroy_fn;
409
- decltype(hsa_signal_group_wait_any_scacquire)* hsa_signal_group_wait_any_scacquire_fn;
410
- decltype(hsa_signal_group_wait_any_relaxed)* hsa_signal_group_wait_any_relaxed_fn;
411
-
412
- //===--- Instruction Set Architecture - HSA v1.1 additions --------------===//
413
-
414
- decltype(hsa_agent_iterate_isas)* hsa_agent_iterate_isas_fn;
415
- decltype(hsa_isa_get_info_alt)* hsa_isa_get_info_alt_fn;
416
- decltype(hsa_isa_get_exception_policies)* hsa_isa_get_exception_policies_fn;
417
- decltype(hsa_isa_get_round_method)* hsa_isa_get_round_method_fn;
418
- decltype(hsa_wavefront_get_info)* hsa_wavefront_get_info_fn;
419
- decltype(hsa_isa_iterate_wavefronts)* hsa_isa_iterate_wavefronts_fn;
420
-
421
- //===--- Code Objects (deprecated) - HSA v1.1 additions -----------------===//
422
-
423
- // Deprecated since v1.1.
424
- decltype(hsa_code_object_get_symbol_from_name)*
425
- hsa_code_object_get_symbol_from_name_fn;
426
-
427
- //===--- Executable - HSA v1.1 additions --------------------------------===//
428
-
429
- decltype(hsa_code_object_reader_create_from_file)*
430
- hsa_code_object_reader_create_from_file_fn;
431
- decltype(hsa_code_object_reader_create_from_memory)*
432
- hsa_code_object_reader_create_from_memory_fn;
433
- decltype(hsa_code_object_reader_destroy)* hsa_code_object_reader_destroy_fn;
434
- decltype(hsa_executable_create_alt)* hsa_executable_create_alt_fn;
435
- decltype(hsa_executable_load_program_code_object)*
436
- hsa_executable_load_program_code_object_fn;
437
- decltype(hsa_executable_load_agent_code_object)*
438
- hsa_executable_load_agent_code_object_fn;
439
- decltype(hsa_executable_validate_alt)* hsa_executable_validate_alt_fn;
440
- decltype(hsa_executable_get_symbol_by_name)*
441
- hsa_executable_get_symbol_by_name_fn;
442
- decltype(hsa_executable_iterate_agent_symbols)*
443
- hsa_executable_iterate_agent_symbols_fn;
444
- decltype(hsa_executable_iterate_program_symbols)*
445
- hsa_executable_iterate_program_symbols_fn;
446
- };
447
-
448
- // Table to export HSA Apis from Core Runtime, Amd Extensions
449
- // Finalizer and Images
450
- struct HsaApiTable {
451
-
452
- // Version of Hsa Api Table
453
- ApiTableVersion version;
454
-
455
- // Table of function pointers to HSA Core Runtime
456
- CoreApiTable* core_;
457
-
458
- // Table of function pointers to AMD extensions
459
- AmdExtTable* amd_ext_;
460
-
461
- // Table of function pointers to HSA Finalizer Extension
462
- FinalizerExtTable* finalizer_ext_;
463
-
464
- // Table of function pointers to HSA Image Extension
465
- ImageExtTable* image_ext_;
466
-
467
- // Table of function pointers for tools to use
468
- ToolsApiTable* tools_;
469
-
470
- // Table of function pointers to AMD PC Sampling Extension
471
- PcSamplingExtTable* pc_sampling_ext_;
472
- };
473
-
474
- // Structure containing instances of different api tables
475
- struct HsaApiTableContainer {
476
- HsaApiTable root;
477
- CoreApiTable core;
478
- AmdExtTable amd_ext;
479
- FinalizerExtTable finalizer_ext;
480
- ImageExtTable image_ext;
481
- ToolsApiTable tools;
482
- PcSamplingExtTable pc_sampling_ext;
483
-
484
- // Default initialization of a container instance
485
- HsaApiTableContainer() {
486
- root.version.major_id = HSA_API_TABLE_MAJOR_VERSION;
487
- root.version.minor_id = sizeof(HsaApiTable);
488
- root.version.step_id = HSA_API_TABLE_STEP_VERSION;
489
-
490
- core.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION;
491
- core.version.minor_id = sizeof(CoreApiTable);
492
- core.version.step_id = HSA_CORE_API_TABLE_STEP_VERSION;
493
- root.core_ = &core;
494
-
495
- amd_ext.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION;
496
- amd_ext.version.minor_id = sizeof(AmdExtTable);
497
- amd_ext.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION;
498
- root.amd_ext_ = &amd_ext;
499
-
500
- finalizer_ext.version.major_id = HSA_FINALIZER_API_TABLE_MAJOR_VERSION;
501
- finalizer_ext.version.minor_id = sizeof(FinalizerExtTable);
502
- finalizer_ext.version.step_id = HSA_FINALIZER_API_TABLE_STEP_VERSION;
503
- root.finalizer_ext_ = &finalizer_ext;
504
-
505
- image_ext.version.major_id = HSA_IMAGE_API_TABLE_MAJOR_VERSION;
506
- image_ext.version.minor_id = sizeof(ImageExtTable);
507
- image_ext.version.step_id = HSA_IMAGE_API_TABLE_STEP_VERSION;
508
- root.image_ext_ = &image_ext;
509
-
510
- tools.version.major_id = HSA_TOOLS_API_TABLE_MAJOR_VERSION;
511
- tools.version.minor_id = sizeof(ToolsApiTable);
512
- tools.version.step_id = HSA_TOOLS_API_TABLE_STEP_VERSION;
513
- root.tools_ = &tools;
514
-
515
- pc_sampling_ext.version.major_id = HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION;
516
- pc_sampling_ext.version.minor_id = sizeof(PcSamplingExtTable);
517
- pc_sampling_ext.version.step_id = HSA_PC_SAMPLING_API_TABLE_STEP_VERSION;
518
- root.pc_sampling_ext_ = &pc_sampling_ext;
519
- }
520
- };
521
-
522
- // Api to copy function pointers of a table
523
- static
524
- void inline copyApi(void* src, void* dest, size_t size) {
525
- assert(size >= sizeof(ApiTableVersion));
526
- memcpy((char*)src + sizeof(ApiTableVersion),
527
- (char*)dest + sizeof(ApiTableVersion),
528
- (size - sizeof(ApiTableVersion)));
529
- }
530
-
531
- // Copy Api child tables if valid.
532
- static void inline copyElement(ApiTableVersion* dest, ApiTableVersion* src) {
533
- if (src->major_id && (dest->major_id == src->major_id)) {
534
- dest->step_id = src->step_id;
535
- dest->minor_id = Min(dest->minor_id, src->minor_id);
536
- copyApi(dest, src, dest->minor_id);
537
- } else {
538
- dest->major_id = 0;
539
- dest->minor_id = 0;
540
- dest->step_id = 0;
541
- }
542
- }
543
-
544
- // Copy constructor for all Api tables. The function assumes the
545
- // user has initialized an instance of tables container correctly
546
- // for the Major, Minor and Stepping Ids of Root and Child Api tables.
547
- // The function will overwrite the value of Minor Id by taking the
548
- // minimum of source and destination parameters. It will also overwrite
549
- // the stepping Id with value from source parameter.
550
- static void inline copyTables(const HsaApiTable* src, HsaApiTable* dest) {
551
- // Verify Major Id of source and destination tables match
552
- if (dest->version.major_id != src->version.major_id) {
553
- dest->version.major_id = 0;
554
- dest->version.minor_id = 0;
555
- dest->version.step_id = 0;
556
- return;
557
- }
558
-
559
- // Initialize the stepping id and minor id of root table. For the
560
- // minor id which encodes struct size, take the minimum of source
561
- // and destination parameters
562
- dest->version.step_id = src->version.step_id;
563
- dest->version.minor_id = Min(dest->version.minor_id, src->version.minor_id);
564
-
565
- // Copy child tables if present
566
- if ((offsetof(HsaApiTable, core_) < dest->version.minor_id))
567
- copyElement(&dest->core_->version, &src->core_->version);
568
- if ((offsetof(HsaApiTable, amd_ext_) < dest->version.minor_id))
569
- copyElement(&dest->amd_ext_->version, &src->amd_ext_->version);
570
- if ((offsetof(HsaApiTable, finalizer_ext_) < dest->version.minor_id))
571
- copyElement(&dest->finalizer_ext_->version, &src->finalizer_ext_->version);
572
- if ((offsetof(HsaApiTable, image_ext_) < dest->version.minor_id))
573
- copyElement(&dest->image_ext_->version, &src->image_ext_->version);
574
- if ((offsetof(HsaApiTable, tools_) < dest->version.minor_id))
575
- copyElement(&dest->tools_->version, &src->tools_->version);
576
- if ((offsetof(HsaApiTable, pc_sampling_ext_) < dest->version.minor_id))
577
- copyElement(&dest->pc_sampling_ext_->version, &src->pc_sampling_ext_->version);
578
- }
579
- #endif