triton-windows 3.3.0.post19__cp312-cp312-win_amd64.whl → 3.4.0.post20__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (173) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +4 -1
  3. triton/_filecheck.py +87 -0
  4. triton/_internal_testing.py +26 -15
  5. triton/_utils.py +110 -21
  6. triton/backends/__init__.py +20 -23
  7. triton/backends/amd/__init__.py +0 -0
  8. triton/backends/amd/compiler.py +112 -78
  9. triton/backends/amd/driver.c +5 -2
  10. triton/backends/amd/driver.py +149 -47
  11. triton/backends/compiler.py +7 -21
  12. triton/backends/nvidia/bin/ptxas.exe +0 -0
  13. triton/backends/nvidia/compiler.py +92 -93
  14. triton/backends/nvidia/driver.c +90 -98
  15. triton/backends/nvidia/driver.py +303 -128
  16. triton/compiler/code_generator.py +212 -111
  17. triton/compiler/compiler.py +110 -25
  18. triton/experimental/__init__.py +0 -0
  19. triton/experimental/gluon/__init__.py +4 -0
  20. triton/experimental/gluon/_compiler.py +0 -0
  21. triton/experimental/gluon/_runtime.py +99 -0
  22. triton/experimental/gluon/language/__init__.py +18 -0
  23. triton/experimental/gluon/language/_core.py +312 -0
  24. triton/experimental/gluon/language/_layouts.py +230 -0
  25. triton/experimental/gluon/language/_math.py +12 -0
  26. triton/experimental/gluon/language/_semantic.py +287 -0
  27. triton/experimental/gluon/language/_standard.py +47 -0
  28. triton/experimental/gluon/language/nvidia/__init__.py +4 -0
  29. triton/experimental/gluon/language/nvidia/blackwell/__init__.py +202 -0
  30. triton/experimental/gluon/language/nvidia/blackwell/tma.py +32 -0
  31. triton/experimental/gluon/language/nvidia/hopper/__init__.py +11 -0
  32. triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +51 -0
  33. triton/experimental/gluon/language/nvidia/hopper/tma.py +96 -0
  34. triton/experimental/gluon/nvidia/__init__.py +4 -0
  35. triton/experimental/gluon/nvidia/blackwell.py +3 -0
  36. triton/experimental/gluon/nvidia/hopper.py +40 -0
  37. triton/knobs.py +481 -0
  38. triton/language/__init__.py +39 -14
  39. triton/language/core.py +794 -537
  40. triton/language/extra/cuda/__init__.py +10 -7
  41. triton/language/extra/cuda/gdc.py +42 -0
  42. triton/language/extra/cuda/libdevice.py +394 -394
  43. triton/language/extra/cuda/utils.py +21 -21
  44. triton/language/extra/hip/libdevice.py +113 -104
  45. triton/language/math.py +65 -66
  46. triton/language/random.py +12 -2
  47. triton/language/semantic.py +1706 -1770
  48. triton/language/standard.py +116 -51
  49. triton/runtime/autotuner.py +117 -59
  50. triton/runtime/build.py +76 -12
  51. triton/runtime/cache.py +18 -47
  52. triton/runtime/driver.py +32 -29
  53. triton/runtime/interpreter.py +72 -35
  54. triton/runtime/jit.py +146 -110
  55. triton/runtime/tcc/lib/python310.def +1610 -0
  56. triton/runtime/tcc/lib/python311.def +1633 -0
  57. triton/runtime/tcc/lib/python312.def +1703 -0
  58. triton/runtime/tcc/lib/python313.def +1651 -0
  59. triton/runtime/tcc/lib/python313t.def +1656 -0
  60. triton/runtime/tcc/lib/python39.def +1644 -0
  61. triton/runtime/tcc/lib/python3t.def +905 -0
  62. triton/testing.py +16 -12
  63. triton/tools/disasm.py +3 -4
  64. triton/tools/tensor_descriptor.py +36 -0
  65. triton/windows_utils.py +14 -6
  66. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/METADATA +7 -2
  67. triton_windows-3.4.0.post20.dist-info/RECORD +186 -0
  68. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/WHEEL +1 -1
  69. triton_windows-3.4.0.post20.dist-info/entry_points.txt +3 -0
  70. triton_windows-3.4.0.post20.dist-info/licenses/LICENSE +23 -0
  71. triton_windows-3.4.0.post20.dist-info/top_level.txt +1 -0
  72. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  73. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
  74. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
  75. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
  76. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  77. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  78. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  79. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
  80. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  81. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
  82. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  83. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  84. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  85. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  86. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  87. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  88. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  89. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  90. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
  91. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
  92. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  93. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  94. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  95. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  96. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  97. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  98. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  99. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
  100. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  101. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  102. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  103. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  104. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  105. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
  106. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
  107. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
  108. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  109. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  110. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  111. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  112. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  113. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  114. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  115. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  116. triton/backends/amd/include/hip/device_functions.h +0 -38
  117. triton/backends/amd/include/hip/driver_types.h +0 -468
  118. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  119. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  120. triton/backends/amd/include/hip/hip_common.h +0 -100
  121. triton/backends/amd/include/hip/hip_complex.h +0 -38
  122. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  123. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  124. triton/backends/amd/include/hip/hip_ext.h +0 -161
  125. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  126. triton/backends/amd/include/hip/hip_fp8.h +0 -33
  127. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  128. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  129. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  130. triton/backends/amd/include/hip/hip_profile.h +0 -27
  131. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  132. triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
  133. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  134. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  135. triton/backends/amd/include/hip/hip_version.h +0 -17
  136. triton/backends/amd/include/hip/hiprtc.h +0 -421
  137. triton/backends/amd/include/hip/library_types.h +0 -78
  138. triton/backends/amd/include/hip/math_functions.h +0 -42
  139. triton/backends/amd/include/hip/surface_types.h +0 -63
  140. triton/backends/amd/include/hip/texture_types.h +0 -194
  141. triton/backends/amd/include/hsa/Brig.h +0 -1131
  142. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  143. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
  144. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  145. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  146. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  147. triton/backends/amd/include/hsa/hsa.h +0 -5738
  148. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  149. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
  150. triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
  151. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
  152. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  153. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  154. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  155. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  156. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
  157. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  158. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
  159. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
  160. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
  161. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  162. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  163. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  164. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  165. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  166. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  167. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  168. triton/backends/amd/include/roctracer/roctx.h +0 -229
  169. triton/language/_utils.py +0 -21
  170. triton/language/extra/cuda/_experimental_tma.py +0 -106
  171. triton/tools/experimental_descriptor.py +0 -32
  172. triton_windows-3.3.0.post19.dist-info/RECORD +0 -253
  173. triton_windows-3.3.0.post19.dist-info/top_level.txt +0 -14
@@ -1,91 +0,0 @@
1
- ////////////////////////////////////////////////////////////////////////////////
2
- //
3
- // The University of Illinois/NCSA
4
- // Open Source License (NCSA)
5
- //
6
- // Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
7
- //
8
- // Developed by:
9
- //
10
- // AMD Research and AMD HSA Software Development
11
- //
12
- // Advanced Micro Devices, Inc.
13
- //
14
- // www.amd.com
15
- //
16
- // Permission is hereby granted, free of charge, to any person obtaining a copy
17
- // of this software and associated documentation files (the "Software"), to
18
- // deal with the Software without restriction, including without limitation
19
- // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20
- // and/or sell copies of the Software, and to permit persons to whom the
21
- // Software is furnished to do so, subject to the following conditions:
22
- //
23
- // - Redistributions of source code must retain the above copyright notice,
24
- // this list of conditions and the following disclaimers.
25
- // - Redistributions in binary form must reproduce the above copyright
26
- // notice, this list of conditions and the following disclaimers in
27
- // the documentation and/or other materials provided with the distribution.
28
- // - Neither the names of Advanced Micro Devices, Inc,
29
- // nor the names of its contributors may be used to endorse or promote
30
- // products derived from this Software without specific prior written
31
- // permission.
32
- //
33
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36
- // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37
- // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38
- // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39
- // DEALINGS WITH THE SOFTWARE.
40
- //
41
- ////////////////////////////////////////////////////////////////////////////////
42
-
43
- // The following set of header files provides definitions for AMD GPU
44
- // Architecture:
45
- // - amd_hsa_common.h
46
- // - amd_hsa_elf.h
47
- // - amd_hsa_kernel_code.h
48
- // - amd_hsa_queue.h
49
- // - amd_hsa_signal.h
50
- //
51
- // Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
52
- // information.
53
-
54
- #ifndef AMD_HSA_COMMON_H
55
- #define AMD_HSA_COMMON_H
56
-
57
- #include <stddef.h>
58
- #include <stdint.h>
59
-
60
- // Descriptive version of the HSA Application Binary Interface.
61
- #define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"
62
-
63
- // Alignment attribute that specifies a minimum alignment (in bytes) for
64
- // variables of the specified type.
65
- #if defined(__GNUC__)
66
- # define __ALIGNED__(x) __attribute__((aligned(x)))
67
- #elif defined(_MSC_VER)
68
- # define __ALIGNED__(x) __declspec(align(x))
69
- #elif defined(RC_INVOKED)
70
- # define __ALIGNED__(x)
71
- #else
72
- # error
73
- #endif
74
-
75
- // Creates enumeration entries for packed types. Enumeration entries include
76
- // bit shift amount, bit width, and bit mask.
77
- #define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \
78
- name##_SHIFT = (shift), \
79
- name##_WIDTH = (width), \
80
- name = (((1 << (width)) - 1) << (shift)) \
81
-
82
- // Gets bits for specified mask from specified src packed instance.
83
- #define AMD_HSA_BITS_GET(src, mask) \
84
- ((src & mask) >> mask ## _SHIFT) \
85
-
86
- // Sets val bits for specified mask in specified dst packed instance.
87
- #define AMD_HSA_BITS_SET(dst, mask, val) \
88
- dst &= (~(1 << mask##_SHIFT) & ~mask); \
89
- dst |= (((val) << mask##_SHIFT) & mask) \
90
-
91
- #endif // AMD_HSA_COMMON_H
@@ -1,462 +0,0 @@
1
- ////////////////////////////////////////////////////////////////////////////////
2
- //
3
- // The University of Illinois/NCSA
4
- // Open Source License (NCSA)
5
- //
6
- // Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
7
- //
8
- // Developed by:
9
- //
10
- // AMD Research and AMD HSA Software Development
11
- //
12
- // Advanced Micro Devices, Inc.
13
- //
14
- // www.amd.com
15
- //
16
- // Permission is hereby granted, free of charge, to any person obtaining a copy
17
- // of this software and associated documentation files (the "Software"), to
18
- // deal with the Software without restriction, including without limitation
19
- // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20
- // and/or sell copies of the Software, and to permit persons to whom the
21
- // Software is furnished to do so, subject to the following conditions:
22
- //
23
- // - Redistributions of source code must retain the above copyright notice,
24
- // this list of conditions and the following disclaimers.
25
- // - Redistributions in binary form must reproduce the above copyright
26
- // notice, this list of conditions and the following disclaimers in
27
- // the documentation and/or other materials provided with the distribution.
28
- // - Neither the names of Advanced Micro Devices, Inc,
29
- // nor the names of its contributors may be used to endorse or promote
30
- // products derived from this Software without specific prior written
31
- // permission.
32
- //
33
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36
- // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37
- // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38
- // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39
- // DEALINGS WITH THE SOFTWARE.
40
- //
41
- ////////////////////////////////////////////////////////////////////////////////
42
-
43
- // Undefine the macro in case it is defined in the system elf.h.
44
- #undef EM_AMDGPU
45
-
46
- #ifndef AMD_HSA_ELF_H
47
- #define AMD_HSA_ELF_H
48
-
49
- // AMD GPU Specific ELF Header Enumeration Values.
50
- //
51
- // Values are copied from LLVM BinaryFormat/ELF.h . This file also contains
52
- // code object V1 defintions which are not part of the LLVM header. Code object
53
- // V1 was only supported by the Finalizer which is now deprecated and removed.
54
- //
55
- // TODO: Deprecate and remove V1 support and replace this header with using the
56
- // LLVM header.
57
- namespace ELF {
58
-
59
- // Machine architectures
60
- // See current registered ELF machine architectures at:
61
- // http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
62
- enum {
63
- EM_AMDGPU = 224, // AMD GPU architecture
64
- };
65
-
66
- // OS ABI identification.
67
- enum {
68
- ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
69
- };
70
-
71
- // AMDGPU OS ABI Version identification.
72
- enum {
73
- // ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
74
- // was never defined for V1.
75
- ELFABIVERSION_AMDGPU_HSA_V2 = 0,
76
- ELFABIVERSION_AMDGPU_HSA_V3 = 1,
77
- ELFABIVERSION_AMDGPU_HSA_V4 = 2,
78
- ELFABIVERSION_AMDGPU_HSA_V5 = 3,
79
- ELFABIVERSION_AMDGPU_HSA_V6 = 4,
80
- };
81
-
82
- // AMDGPU specific e_flags.
83
- enum : unsigned {
84
- // Processor selection mask for EF_AMDGPU_MACH_* values.
85
- EF_AMDGPU_MACH = 0x0ff,
86
-
87
- // Not specified processor.
88
- EF_AMDGPU_MACH_NONE = 0x000,
89
-
90
- // AMDGCN-based processors.
91
- // clang-format off
92
- EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
93
- EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
94
- EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
95
- EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023,
96
- EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024,
97
- EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025,
98
- EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026,
99
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27 = 0x027,
100
- EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
101
- EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029,
102
- EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
103
- EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b,
104
- EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
105
- EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
106
- EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
107
- EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
108
- EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
109
- EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
110
- EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032,
111
- EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
112
- EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
113
- EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
114
- EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
115
- EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037,
116
- EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038,
117
- EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039,
118
- EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a,
119
- EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b,
120
- EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c,
121
- EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
122
- EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
123
- EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
124
- EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
125
- EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
126
- EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
127
- EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
128
- EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044,
129
- EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
130
- EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046,
131
- EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047,
132
- EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
133
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
134
- EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
135
- EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
136
- EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
137
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
138
- EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
139
- EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
140
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
141
- EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
142
- EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
143
- EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
144
- EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
145
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X55 = 0x055,
146
- // clang-format on
147
-
148
- // First/last AMDGCN-based processors.
149
- EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
150
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC,
151
-
152
- // Indicates if the "xnack" target feature is enabled for all code contained
153
- // in the object.
154
- //
155
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
156
- EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
157
- // Indicates if the trap handler is enabled for all code contained
158
- // in the object.
159
- //
160
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
161
- EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
162
-
163
- // Indicates if the "xnack" target feature is enabled for all code contained
164
- // in the object.
165
- //
166
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
167
- EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
168
- // Indicates if the "sramecc" target feature is enabled for all code
169
- // contained in the object.
170
- //
171
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
172
- EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
173
-
174
- // XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
175
- //
176
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
177
- EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
178
- // XNACK is not supported.
179
- EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
180
- // XNACK is any/default/unspecified.
181
- EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
182
- // XNACK is off.
183
- EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
184
- // XNACK is on.
185
- EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
186
-
187
- // SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
188
- //
189
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
190
- EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
191
- // SRAMECC is not supported.
192
- EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
193
- // SRAMECC is any/default/unspecified.
194
- EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
195
- // SRAMECC is off.
196
- EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
197
- // SRAMECC is on.
198
- EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
199
-
200
- // Generic target versioning. This is contained in the list byte of EFLAGS.
201
- EF_AMDGPU_GENERIC_VERSION = 0xff000000,
202
- EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
203
- EF_AMDGPU_GENERIC_VERSION_MIN = 1,
204
- EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
205
- };
206
-
207
- // ELF Relocation types for AMDGPU.
208
- enum : unsigned {
209
- R_AMDGPU_ABS32_LO = 1,
210
- R_AMDGPU_ABS32_HI = 2,
211
- R_AMDGPU_ABS64 = 3,
212
- R_AMDGPU_ABS32 = 6,
213
- R_AMDGPU_RELATIVE64 = 13,
214
- };
215
-
216
- } // end namespace ELF
217
-
218
- // ELF Section Header Flag Enumeration Values.
219
- #define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS)
220
- #define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
221
- #define SHF_AMDGPU_HSA_CODE (0x00400000 & SHF_MASKOS)
222
- #define SHF_AMDGPU_HSA_AGENT (0x00800000 & SHF_MASKOS)
223
-
224
- //
225
- typedef enum {
226
- AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
227
- AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
228
- AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
229
- AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
230
- AMDGPU_HSA_SEGMENT_LAST,
231
- } amdgpu_hsa_elf_segment_t;
232
-
233
- // ELF Program Header Type Enumeration Values.
234
- #define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
235
- #define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
236
- #define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
237
- #define PT_AMDGPU_HSA_LOAD_CODE_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)
238
-
239
- // ELF Symbol Type Enumeration Values.
240
- #define STT_AMDGPU_HSA_KERNEL (STT_LOOS + 0)
241
- #define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
242
- #define STT_AMDGPU_HSA_METADATA (STT_LOOS + 2)
243
-
244
- // ELF Symbol Binding Enumeration Values.
245
- #define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)
246
-
247
- // ELF Symbol Other Information Creation/Retrieval.
248
- #define ELF64_ST_AMDGPU_ALLOCATION(o) (((o) >> 2) & 0x3)
249
- #define ELF64_ST_AMDGPU_FLAGS(o) ((o) >> 4)
250
- #define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))
251
-
252
- typedef enum {
253
- AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
254
- AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
255
- AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
256
- AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
257
- AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
258
- } amdgpu_hsa_symbol_allocation_t;
259
-
260
- // ELF Symbol Allocation Enumeration Values.
261
- #define STA_AMDGPU_HSA_DEFAULT AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
262
- #define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
263
- #define STA_AMDGPU_HSA_GLOBAL_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
264
- #define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT
265
-
266
- typedef enum {
267
- AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
268
- AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
269
- AMDGPU_HSA_SYMBOL_FLAG_LAST,
270
- } amdgpu_hsa_symbol_flag_t;
271
-
272
- // ELF Symbol Flag Enumeration Values.
273
- #define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
274
-
275
- // Legacy/V1 AMD GPU Relocation Type Enumeration Values.
276
- #define R_AMDGPU_V1_NONE 0
277
- #define R_AMDGPU_V1_32_LOW 1
278
- #define R_AMDGPU_V1_32_HIGH 2
279
- #define R_AMDGPU_V1_64 3
280
- #define R_AMDGPU_V1_INIT_SAMPLER 4
281
- #define R_AMDGPU_V1_INIT_IMAGE 5
282
- #define R_AMDGPU_V1_RELATIVE64 13
283
-
284
- // AMD GPU Note Type Enumeration Values.
285
- #define NT_AMD_HSA_CODE_OBJECT_VERSION 1
286
- #define NT_AMD_HSA_HSAIL 2
287
- #define NT_AMD_HSA_ISA_VERSION 3
288
- #define NT_AMD_HSA_PRODUCER 4
289
- #define NT_AMD_HSA_PRODUCER_OPTIONS 5
290
- #define NT_AMD_HSA_EXTENSION 6
291
- #define NT_AMD_HSA_ISA_NAME 11
292
- /* AMDGPU snapshots of runtime, agent and queues state for use in core dump */
293
- #define NT_AMDGPU_CORE_STATE 33
294
- #define NT_AMD_HSA_HLDEBUG_DEBUG 101
295
- #define NT_AMD_HSA_HLDEBUG_TARGET 102
296
-
297
- // AMD GPU Metadata Kind Enumeration Values.
298
- typedef uint16_t amdgpu_hsa_metadata_kind16_t;
299
- typedef enum {
300
- AMDGPU_HSA_METADATA_KIND_NONE = 0,
301
- AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
302
- AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
303
- AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
304
- AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
305
- } amdgpu_hsa_metadata_kind_t;
306
-
307
- // AMD GPU Sampler Coordinate Normalization Enumeration Values.
308
- typedef uint8_t amdgpu_hsa_sampler_coord8_t;
309
- typedef enum {
310
- AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
311
- AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
312
- } amdgpu_hsa_sampler_coord_t;
313
-
314
- // AMD GPU Sampler Filter Enumeration Values.
315
- typedef uint8_t amdgpu_hsa_sampler_filter8_t;
316
- typedef enum {
317
- AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
318
- AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
319
- } amdgpu_hsa_sampler_filter_t;
320
-
321
- // AMD GPU Sampler Addressing Enumeration Values.
322
- typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
323
- typedef enum {
324
- AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
325
- AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
326
- AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
327
- AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
328
- AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
329
- } amdgpu_hsa_sampler_addressing_t;
330
-
331
- // AMD GPU Sampler Descriptor.
332
- typedef struct amdgpu_hsa_sampler_descriptor_s {
333
- uint16_t size;
334
- amdgpu_hsa_metadata_kind16_t kind;
335
- amdgpu_hsa_sampler_coord8_t coord;
336
- amdgpu_hsa_sampler_filter8_t filter;
337
- amdgpu_hsa_sampler_addressing8_t addressing;
338
- uint8_t reserved1;
339
- } amdgpu_hsa_sampler_descriptor_t;
340
-
341
- // AMD GPU Image Geometry Enumeration Values.
342
- typedef uint8_t amdgpu_hsa_image_geometry8_t;
343
- typedef enum {
344
- AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
345
- AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
346
- AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
347
- AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
348
- AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
349
- AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
350
- AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
351
- AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
352
- } amdgpu_hsa_image_geometry_t;
353
-
354
- // AMD GPU Image Channel Order Enumeration Values.
355
- typedef uint8_t amdgpu_hsa_image_channel_order8_t;
356
- typedef enum {
357
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
358
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
359
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
360
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
361
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
362
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
363
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
364
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
365
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
366
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
367
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
368
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
369
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
370
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
371
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
372
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
373
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
374
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
375
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
376
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
377
- } amdgpu_hsa_image_channel_order_t;
378
-
379
- // AMD GPU Image Channel Type Enumeration Values.
380
- typedef uint8_t amdgpu_hsa_image_channel_type8_t;
381
- typedef enum {
382
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
383
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
384
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
385
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
386
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
387
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
388
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
389
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
390
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
391
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
392
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
393
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
394
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
395
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
396
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
397
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
398
- } amdgpu_hsa_image_channel_type_t;
399
-
400
- // AMD GPU Image Descriptor.
401
- typedef struct amdgpu_hsa_image_descriptor_s {
402
- uint16_t size;
403
- amdgpu_hsa_metadata_kind16_t kind;
404
- amdgpu_hsa_image_geometry8_t geometry;
405
- amdgpu_hsa_image_channel_order8_t channel_order;
406
- amdgpu_hsa_image_channel_type8_t channel_type;
407
- uint8_t reserved1;
408
- uint64_t width;
409
- uint64_t height;
410
- uint64_t depth;
411
- uint64_t array;
412
- } amdgpu_hsa_image_descriptor_t;
413
-
414
- typedef struct amdgpu_hsa_note_code_object_version_s {
415
- uint32_t major_version;
416
- uint32_t minor_version;
417
- } amdgpu_hsa_note_code_object_version_t;
418
-
419
- typedef struct amdgpu_hsa_note_hsail_s {
420
- uint32_t hsail_major_version;
421
- uint32_t hsail_minor_version;
422
- uint8_t profile;
423
- uint8_t machine_model;
424
- uint8_t default_float_round;
425
- } amdgpu_hsa_note_hsail_t;
426
-
427
- typedef struct amdgpu_hsa_note_isa_s {
428
- uint16_t vendor_name_size;
429
- uint16_t architecture_name_size;
430
- uint32_t major;
431
- uint32_t minor;
432
- uint32_t stepping;
433
- char vendor_and_architecture_name[1];
434
- } amdgpu_hsa_note_isa_t;
435
-
436
- typedef struct amdgpu_hsa_note_producer_s {
437
- uint16_t producer_name_size;
438
- uint16_t reserved;
439
- uint32_t producer_major_version;
440
- uint32_t producer_minor_version;
441
- char producer_name[1];
442
- } amdgpu_hsa_note_producer_t;
443
-
444
- typedef struct amdgpu_hsa_note_producer_options_s {
445
- uint16_t producer_options_size;
446
- char producer_options[1];
447
- } amdgpu_hsa_note_producer_options_t;
448
-
449
- typedef enum {
450
- AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
451
- AMDGPU_HSA_RODATA_GLOBAL_AGENT,
452
- AMDGPU_HSA_RODATA_READONLY_AGENT,
453
- AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
454
- AMDGPU_HSA_DATA_GLOBAL_AGENT,
455
- AMDGPU_HSA_DATA_READONLY_AGENT,
456
- AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
457
- AMDGPU_HSA_BSS_GLOBAL_AGENT,
458
- AMDGPU_HSA_BSS_READONLY_AGENT,
459
- AMDGPU_HSA_SECTION_LAST,
460
- } amdgpu_hsa_elf_section_t;
461
-
462
- #endif // AMD_HSA_ELF_H