triton-windows 3.3.1.post19__cp311-cp311-win_amd64.whl → 3.5.0.post21__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (225) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +11 -2
  3. triton/_filecheck.py +97 -0
  4. triton/_internal_testing.py +95 -18
  5. triton/_utils.py +112 -21
  6. triton/backends/__init__.py +20 -23
  7. triton/backends/amd/__init__.py +0 -0
  8. triton/backends/amd/compiler.py +161 -119
  9. triton/backends/amd/driver.c +118 -46
  10. triton/backends/amd/driver.py +274 -96
  11. triton/backends/compiler.py +7 -21
  12. triton/backends/driver.py +13 -0
  13. triton/backends/nvidia/bin/ptxas.exe +0 -0
  14. triton/backends/nvidia/compiler.py +163 -106
  15. triton/backends/nvidia/driver.c +166 -101
  16. triton/backends/nvidia/driver.py +384 -202
  17. triton/compiler/__init__.py +5 -2
  18. triton/compiler/code_generator.py +439 -231
  19. triton/compiler/compiler.py +152 -84
  20. triton/experimental/__init__.py +0 -0
  21. triton/experimental/gluon/__init__.py +5 -0
  22. triton/experimental/gluon/_compiler.py +0 -0
  23. triton/experimental/gluon/_runtime.py +102 -0
  24. triton/experimental/gluon/language/__init__.py +119 -0
  25. triton/experimental/gluon/language/_core.py +490 -0
  26. triton/experimental/gluon/language/_layouts.py +583 -0
  27. triton/experimental/gluon/language/_math.py +20 -0
  28. triton/experimental/gluon/language/_semantic.py +380 -0
  29. triton/experimental/gluon/language/_standard.py +80 -0
  30. triton/experimental/gluon/language/amd/__init__.py +4 -0
  31. triton/experimental/gluon/language/amd/_layouts.py +96 -0
  32. triton/experimental/gluon/language/amd/cdna3/__init__.py +100 -0
  33. triton/experimental/gluon/language/amd/cdna4/__init__.py +48 -0
  34. triton/experimental/gluon/language/amd/cdna4/async_copy.py +151 -0
  35. triton/experimental/gluon/language/extra/__init__.py +3 -0
  36. triton/experimental/gluon/language/nvidia/__init__.py +4 -0
  37. triton/experimental/gluon/language/nvidia/ampere/__init__.py +3 -0
  38. triton/experimental/gluon/language/nvidia/ampere/async_copy.py +74 -0
  39. triton/experimental/gluon/language/nvidia/ampere/mbarrier.py +80 -0
  40. triton/experimental/gluon/language/nvidia/blackwell/__init__.py +387 -0
  41. triton/experimental/gluon/language/nvidia/blackwell/tma.py +52 -0
  42. triton/experimental/gluon/language/nvidia/hopper/__init__.py +132 -0
  43. triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +34 -0
  44. triton/experimental/gluon/language/nvidia/hopper/tma.py +97 -0
  45. triton/experimental/gluon/nvidia/__init__.py +4 -0
  46. triton/experimental/gluon/nvidia/blackwell.py +3 -0
  47. triton/experimental/gluon/nvidia/hopper.py +45 -0
  48. triton/knobs.py +546 -0
  49. triton/language/__init__.py +50 -19
  50. triton/language/core.py +909 -572
  51. triton/language/extra/cuda/__init__.py +10 -7
  52. triton/language/extra/cuda/gdc.py +42 -0
  53. triton/language/extra/cuda/libdevice.py +394 -394
  54. triton/language/extra/cuda/utils.py +21 -21
  55. triton/language/extra/hip/__init__.py +3 -1
  56. triton/language/extra/hip/libdevice.py +120 -104
  57. triton/language/extra/hip/utils.py +35 -0
  58. triton/language/extra/libdevice.py +4 -0
  59. triton/language/math.py +65 -66
  60. triton/language/random.py +12 -2
  61. triton/language/semantic.py +1757 -1768
  62. triton/language/standard.py +127 -62
  63. triton/language/target_info.py +54 -0
  64. triton/runtime/_allocation.py +15 -3
  65. triton/runtime/_async_compile.py +55 -0
  66. triton/runtime/autotuner.py +117 -60
  67. triton/runtime/build.py +83 -17
  68. triton/runtime/cache.py +61 -47
  69. triton/runtime/driver.py +25 -47
  70. triton/runtime/interpreter.py +95 -50
  71. triton/runtime/jit.py +445 -248
  72. triton/runtime/tcc/include/_mingw.h +8 -10
  73. triton/runtime/tcc/include/assert.h +5 -0
  74. triton/runtime/tcc/include/errno.h +1 -1
  75. triton/runtime/tcc/include/float.h +21 -3
  76. triton/runtime/tcc/include/iso646.h +36 -0
  77. triton/runtime/tcc/include/limits.h +5 -0
  78. triton/runtime/tcc/include/malloc.h +2 -2
  79. triton/runtime/tcc/include/math.h +21 -261
  80. triton/runtime/tcc/include/stdalign.h +16 -0
  81. triton/runtime/tcc/include/stdarg.h +5 -70
  82. triton/runtime/tcc/include/stdatomic.h +171 -0
  83. triton/runtime/tcc/include/stddef.h +7 -19
  84. triton/runtime/tcc/include/stdlib.h +15 -4
  85. triton/runtime/tcc/include/stdnoreturn.h +7 -0
  86. triton/runtime/tcc/include/sys/stat.h +2 -2
  87. triton/runtime/tcc/include/sys/types.h +5 -0
  88. triton/runtime/tcc/include/tcc/tcc_libm.h +444 -27
  89. triton/runtime/tcc/include/tccdefs.h +342 -0
  90. triton/runtime/tcc/include/tgmath.h +89 -0
  91. triton/runtime/tcc/include/uchar.h +33 -0
  92. triton/runtime/tcc/include/unistd.h +1 -0
  93. triton/runtime/tcc/include/winapi/qos.h +72 -0
  94. triton/runtime/tcc/include/winapi/shellapi.h +59 -0
  95. triton/runtime/tcc/include/winapi/winbase.h +9 -2
  96. triton/runtime/tcc/include/winapi/wincon.h +8 -0
  97. triton/runtime/tcc/include/winapi/windows.h +1 -1
  98. triton/runtime/tcc/include/winapi/winnls.h +778 -0
  99. triton/runtime/tcc/include/winapi/winnt.h +9 -7
  100. triton/runtime/tcc/include/winapi/winsock2.h +1474 -0
  101. triton/runtime/tcc/include/winapi/ws2ipdef.h +21 -0
  102. triton/runtime/tcc/include/winapi/ws2tcpip.h +391 -0
  103. triton/runtime/tcc/lib/libtcc1.a +0 -0
  104. triton/runtime/tcc/lib/python314.def +1800 -0
  105. triton/runtime/tcc/lib/python314t.def +1809 -0
  106. triton/runtime/tcc/libtcc.dll +0 -0
  107. triton/runtime/tcc/tcc.exe +0 -0
  108. triton/testing.py +16 -12
  109. triton/tools/compile.py +62 -14
  110. triton/tools/disasm.py +3 -4
  111. triton/tools/extra/cuda/compile.c +1 -0
  112. triton/tools/extra/hip/compile.cpp +66 -0
  113. triton/tools/extra/hip/compile.h +13 -0
  114. triton/tools/ragged_tma.py +92 -0
  115. triton/tools/tensor_descriptor.py +34 -0
  116. triton/windows_utils.py +52 -81
  117. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/METADATA +8 -4
  118. triton_windows-3.5.0.post21.dist-info/RECORD +217 -0
  119. triton_windows-3.5.0.post21.dist-info/entry_points.txt +3 -0
  120. triton_windows-3.5.0.post21.dist-info/licenses/LICENSE +23 -0
  121. triton_windows-3.5.0.post21.dist-info/top_level.txt +1 -0
  122. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  123. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
  124. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
  125. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
  126. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  127. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  128. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  129. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
  130. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  131. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
  132. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  133. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  134. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  135. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  136. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  137. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  138. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  139. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  140. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
  141. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
  142. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  143. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  144. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  145. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  146. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  147. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  148. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  149. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
  150. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  151. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  152. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  153. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  154. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  155. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
  156. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
  157. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
  158. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  159. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  160. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  161. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  162. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  163. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  164. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  165. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  166. triton/backends/amd/include/hip/device_functions.h +0 -38
  167. triton/backends/amd/include/hip/driver_types.h +0 -468
  168. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  169. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  170. triton/backends/amd/include/hip/hip_common.h +0 -100
  171. triton/backends/amd/include/hip/hip_complex.h +0 -38
  172. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  173. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  174. triton/backends/amd/include/hip/hip_ext.h +0 -161
  175. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  176. triton/backends/amd/include/hip/hip_fp8.h +0 -33
  177. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  178. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  179. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  180. triton/backends/amd/include/hip/hip_profile.h +0 -27
  181. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  182. triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
  183. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  184. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  185. triton/backends/amd/include/hip/hip_version.h +0 -17
  186. triton/backends/amd/include/hip/hiprtc.h +0 -421
  187. triton/backends/amd/include/hip/library_types.h +0 -78
  188. triton/backends/amd/include/hip/math_functions.h +0 -42
  189. triton/backends/amd/include/hip/surface_types.h +0 -63
  190. triton/backends/amd/include/hip/texture_types.h +0 -194
  191. triton/backends/amd/include/hsa/Brig.h +0 -1131
  192. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  193. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
  194. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  195. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  196. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  197. triton/backends/amd/include/hsa/hsa.h +0 -5738
  198. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  199. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
  200. triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
  201. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
  202. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  203. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  204. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  205. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  206. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
  207. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  208. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
  209. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
  210. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
  211. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  212. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  213. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  214. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  215. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  216. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  217. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  218. triton/backends/amd/include/roctracer/roctx.h +0 -229
  219. triton/language/_utils.py +0 -21
  220. triton/language/extra/cuda/_experimental_tma.py +0 -106
  221. triton/runtime/tcc/lib/libtcc1-64.a +0 -0
  222. triton/tools/experimental_descriptor.py +0 -32
  223. triton_windows-3.3.1.post19.dist-info/RECORD +0 -260
  224. triton_windows-3.3.1.post19.dist-info/top_level.txt +0 -14
  225. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/WHEEL +0 -0
@@ -1,531 +0,0 @@
1
- ////////////////////////////////////////////////////////////////////////////////
2
- //
3
- // The University of Illinois/NCSA
4
- // Open Source License (NCSA)
5
- //
6
- // Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
7
- //
8
- // Developed by:
9
- //
10
- // AMD Research and AMD HSA Software Development
11
- //
12
- // Advanced Micro Devices, Inc.
13
- //
14
- // www.amd.com
15
- //
16
- // Permission is hereby granted, free of charge, to any person obtaining a copy
17
- // of this software and associated documentation files (the "Software"), to
18
- // deal with the Software without restriction, including without limitation
19
- // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20
- // and/or sell copies of the Software, and to permit persons to whom the
21
- // Software is furnished to do so, subject to the following conditions:
22
- //
23
- // - Redistributions of source code must retain the above copyright notice,
24
- // this list of conditions and the following disclaimers.
25
- // - Redistributions in binary form must reproduce the above copyright
26
- // notice, this list of conditions and the following disclaimers in
27
- // the documentation and/or other materials provided with the distribution.
28
- // - Neither the names of Advanced Micro Devices, Inc,
29
- // nor the names of its contributors may be used to endorse or promote
30
- // products derived from this Software without specific prior written
31
- // permission.
32
- //
33
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36
- // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37
- // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38
- // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39
- // DEALINGS WITH THE SOFTWARE.
40
- //
41
- ////////////////////////////////////////////////////////////////////////////////
42
-
43
- #ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
44
- #define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
45
-
46
- #include "hsa.h"
47
-
48
- #undef HSA_API
49
- #ifdef HSA_EXPORT_FINALIZER
50
- #define HSA_API HSA_API_EXPORT
51
- #else
52
- #define HSA_API HSA_API_IMPORT
53
- #endif
54
-
55
- #ifdef __cplusplus
56
- extern "C" {
57
- #endif // __cplusplus
58
-
59
- struct BrigModuleHeader;
60
- typedef struct BrigModuleHeader* BrigModule_t;
61
-
62
- /** \defgroup ext-alt-finalizer-extensions Finalization Extensions
63
- * @{
64
- */
65
-
66
- /**
67
- * @brief Enumeration constants added to ::hsa_status_t by this extension.
68
- */
69
- enum {
70
- /**
71
- * The HSAIL program is invalid.
72
- */
73
- HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
74
- /**
75
- * The HSAIL module is invalid.
76
- */
77
- HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
78
- /**
79
- * Machine model or profile of the HSAIL module do not match the machine model
80
- * or profile of the HSAIL program.
81
- */
82
- HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
83
- /**
84
- * The HSAIL module is already a part of the HSAIL program.
85
- */
86
- HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
87
- /**
88
- * Compatibility mismatch between symbol declaration and symbol definition.
89
- */
90
- HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
91
- /**
92
- * The finalization encountered an error while finalizing a kernel or
93
- * indirect function.
94
- */
95
- HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
96
- /**
97
- * Mismatch between a directive in the control directive structure and in
98
- * the HSAIL kernel.
99
- */
100
- HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
101
- };
102
-
103
- /** @} */
104
-
105
- /** \defgroup ext-alt-finalizer-program Finalization Program
106
- * @{
107
- */
108
-
109
- /**
110
- * @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
111
- * the definition of the BrigModule_t type.
112
- */
113
- typedef BrigModule_t hsa_ext_module_t;
114
-
115
- /**
116
- * @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
117
- * modules that collectively define functions and variables used by kernels and
118
- * indirect functions.
119
- */
120
- typedef struct hsa_ext_program_s {
121
- /**
122
- * Opaque handle.
123
- */
124
- uint64_t handle;
125
- } hsa_ext_program_t;
126
-
127
- /**
128
- * @brief Create an empty HSAIL program.
129
- *
130
- * @param[in] machine_model Machine model used in the HSAIL program.
131
- *
132
- * @param[in] profile Profile used in the HSAIL program.
133
- *
134
- * @param[in] default_float_rounding_mode Default float rounding mode used in
135
- * the HSAIL program.
136
- *
137
- * @param[in] options Vendor-specific options. May be NULL.
138
- *
139
- * @param[out] program Memory location where the HSA runtime stores the newly
140
- * created HSAIL program handle.
141
- *
142
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
143
- *
144
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
145
- * initialized.
146
- *
147
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
148
- * resources required for the operation.
149
- *
150
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
151
- * @p profile is invalid, @p default_float_rounding_mode is invalid, or
152
- * @p program is NULL.
153
- */
154
- hsa_status_t HSA_API hsa_ext_program_create(
155
- hsa_machine_model_t machine_model,
156
- hsa_profile_t profile,
157
- hsa_default_float_rounding_mode_t default_float_rounding_mode,
158
- const char *options,
159
- hsa_ext_program_t *program);
160
-
161
- /**
162
- * @brief Destroy a HSAIL program.
163
- *
164
- * @details The HSAIL program handle becomes invalid after it has been
165
- * destroyed. Code object handles produced by ::hsa_ext_program_finalize are
166
- * still valid after the HSAIL program has been destroyed, and can be used as
167
- * intended. Resources allocated outside and associated with the HSAIL program
168
- * (such as HSAIL modules that are added to the HSAIL program) can be released
169
- * after the finalization program has been destroyed.
170
- *
171
- * @param[in] program HSAIL program.
172
- *
173
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
174
- *
175
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
176
- * initialized.
177
- *
178
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
179
- * invalid.
180
- */
181
- hsa_status_t HSA_API hsa_ext_program_destroy(
182
- hsa_ext_program_t program);
183
-
184
- /**
185
- * @brief Add a HSAIL module to an existing HSAIL program.
186
- *
187
- * @details The HSA runtime does not perform a deep copy of the HSAIL module
188
- * upon addition. Instead, it stores a pointer to the HSAIL module. The
189
- * ownership of the HSAIL module belongs to the application, which must ensure
190
- * that @p module is not released before destroying the HSAIL program.
191
- *
192
- * The HSAIL module is successfully added to the HSAIL program if @p module is
193
- * valid, if all the declarations and definitions for the same symbol are
194
- * compatible, and if @p module specify machine model and profile that matches
195
- * the HSAIL program.
196
- *
197
- * @param[in] program HSAIL program.
198
- *
199
- * @param[in] module HSAIL module. The application can add the same HSAIL module
200
- * to @p program at most once. The HSAIL module must specify the same machine
201
- * model and profile as @p program. If the floating-mode rounding mode of @p
202
- * module is not default, then it should match that of @p program.
203
- *
204
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
205
- *
206
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
207
- * initialized.
208
- *
209
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
210
- * resources required for the operation.
211
- *
212
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
213
- *
214
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
215
- *
216
- * @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
217
- * module does not match machine model of @p program, or the profile of @p
218
- * module does not match profile of @p program.
219
- *
220
- * @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
221
- * already a part of the HSAIL program.
222
- *
223
- * @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
224
- * definition compatibility mismatch. See the symbol compatibility rules in the
225
- * HSA Programming Reference Manual.
226
- */
227
- hsa_status_t HSA_API hsa_ext_program_add_module(
228
- hsa_ext_program_t program,
229
- hsa_ext_module_t module);
230
-
231
- /**
232
- * @brief Iterate over the HSAIL modules in a program, and invoke an
233
- * application-defined callback on every iteration.
234
- *
235
- * @param[in] program HSAIL program.
236
- *
237
- * @param[in] callback Callback to be invoked once per HSAIL module in the
238
- * program. The HSA runtime passes three arguments to the callback: the program,
239
- * a HSAIL module, and the application data. If @p callback returns a status
240
- * other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
241
- * stops and ::hsa_ext_program_iterate_modules returns that status value.
242
- *
243
- * @param[in] data Application data that is passed to @p callback on every
244
- * iteration. May be NULL.
245
- *
246
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
247
- *
248
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
249
- * initialized.
250
- *
251
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
252
- *
253
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
254
- */
255
- hsa_status_t HSA_API hsa_ext_program_iterate_modules(
256
- hsa_ext_program_t program,
257
- hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
258
- void* data),
259
- void* data);
260
-
261
- /**
262
- * @brief HSAIL program attributes.
263
- */
264
- typedef enum {
265
- /**
266
- * Machine model specified when the HSAIL program was created. The type
267
- * of this attribute is ::hsa_machine_model_t.
268
- */
269
- HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
270
- /**
271
- * Profile specified when the HSAIL program was created. The type of
272
- * this attribute is ::hsa_profile_t.
273
- */
274
- HSA_EXT_PROGRAM_INFO_PROFILE = 1,
275
- /**
276
- * Default float rounding mode specified when the HSAIL program was
277
- * created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
278
- */
279
- HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
280
- } hsa_ext_program_info_t;
281
-
282
- /**
283
- * @brief Get the current value of an attribute for a given HSAIL program.
284
- *
285
- * @param[in] program HSAIL program.
286
- *
287
- * @param[in] attribute Attribute to query.
288
- *
289
- * @param[out] value Pointer to an application-allocated buffer where to store
290
- * the value of the attribute. If the buffer passed by the application is not
291
- * large enough to hold the value of @p attribute, the behaviour is undefined.
292
- *
293
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
294
- *
295
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
296
- * initialized.
297
- *
298
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
299
- *
300
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
301
- * HSAIL program attribute, or @p value is NULL.
302
- */
303
- hsa_status_t HSA_API hsa_ext_program_get_info(
304
- hsa_ext_program_t program,
305
- hsa_ext_program_info_t attribute,
306
- void *value);
307
-
308
- /**
309
- * @brief Finalizer-determined call convention.
310
- */
311
- typedef enum {
312
- /**
313
- * Finalizer-determined call convention.
314
- */
315
- HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
316
- } hsa_ext_finalizer_call_convention_t;
317
-
318
- /**
319
- * @brief Control directives specify low-level information about the
320
- * finalization process.
321
- */
322
- typedef struct hsa_ext_control_directives_s {
323
- /**
324
- * Bitset indicating which control directives are enabled. The bit assigned to
325
- * a control directive is determined by the corresponding value in
326
- * BrigControlDirective.
327
- *
328
- * If a control directive is disabled, its corresponding field value (if any)
329
- * must be 0. Control directives that are only present or absent (such as
330
- * partial workgroups) have no corresponding field as the presence of the bit
331
- * in this mask is sufficient.
332
- */
333
- uint64_t control_directives_mask;
334
- /**
335
- * Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
336
- * assigned to an HSAIL exception is determined by the corresponding value
337
- * in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
338
- * control directive, the finalizer uses the union of the two masks.
339
- */
340
- uint16_t break_exceptions_mask;
341
- /**
342
- * Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
343
- * bit assigned to an HSAIL exception is determined by the corresponding value
344
- * in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
345
- * control directive, the finalizer uses the union of the two masks.
346
- */
347
- uint16_t detect_exceptions_mask;
348
- /**
349
- * Maximum size (in bytes) of dynamic group memory that will be allocated by
350
- * the application for any dispatch of the kernel. If the kernel contains a
351
- * maxdynamicsize control directive, the two values should match.
352
- */
353
- uint32_t max_dynamic_group_size;
354
- /**
355
- * Maximum number of grid work-items that will be used by the application to
356
- * launch the kernel. If the kernel contains a maxflatgridsize control
357
- * directive, the value of @a max_flat_grid_size must not be greater than the
358
- * value of the directive, and takes precedence.
359
- *
360
- * The value specified for maximum absolute grid size must be greater than or
361
- * equal to the product of the values specified by @a required_grid_size.
362
- *
363
- * If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
364
- * control_directives_mask, this field must be greater than 0.
365
- */
366
- uint64_t max_flat_grid_size;
367
- /**
368
- * Maximum number of work-group work-items that will be used by the
369
- * application to launch the kernel. If the kernel contains a
370
- * maxflatworkgroupsize control directive, the value of @a
371
- * max_flat_workgroup_size must not be greater than the value of the
372
- * directive, and takes precedence.
373
- *
374
- * The value specified for maximum absolute grid size must be greater than or
375
- * equal to the product of the values specified by @a required_workgroup_size.
376
- *
377
- * If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
378
- * control_directives_mask, this field must be greater than 0.
379
- */
380
- uint32_t max_flat_workgroup_size;
381
- /**
382
- * Reserved. Must be 0.
383
- */
384
- uint32_t reserved1;
385
- /**
386
- * Grid size that will be used by the application in any dispatch of the
387
- * kernel. If the kernel contains a requiredgridsize control directive, the
388
- * dimensions should match.
389
- *
390
- * The specified grid size must be consistent with @a required_workgroup_size
391
- * and @a required_dim. Also, the product of the three dimensions must not
392
- * exceed @a max_flat_grid_size. Note that the listed invariants must hold
393
- * only if all the corresponding control directives are enabled.
394
- *
395
- * If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
396
- * control_directives_mask, the three dimension values must be greater than 0.
397
- */
398
- uint64_t required_grid_size[3];
399
- /**
400
- * Work-group size that will be used by the application in any dispatch of the
401
- * kernel. If the kernel contains a requiredworkgroupsize control directive,
402
- * the dimensions should match.
403
- *
404
- * The specified work-group size must be consistent with @a required_grid_size
405
- * and @a required_dim. Also, the product of the three dimensions must not
406
- * exceed @a max_flat_workgroup_size. Note that the listed invariants must
407
- * hold only if all the corresponding control directives are enabled.
408
- *
409
- * If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
410
- * control_directives_mask, the three dimension values must be greater than 0.
411
- */
412
- hsa_dim3_t required_workgroup_size;
413
- /**
414
- * Number of dimensions that will be used by the application to launch the
415
- * kernel. If the kernel contains a requireddim control directive, the two
416
- * values should match.
417
- *
418
- * The specified dimensions must be consistent with @a required_grid_size and
419
- * @a required_workgroup_size. This invariant must hold only if all the
420
- * corresponding control directives are enabled.
421
- *
422
- * If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
423
- * control_directives_mask, this field must be 1, 2, or 3.
424
- */
425
- uint8_t required_dim;
426
- /**
427
- * Reserved. Must be 0.
428
- */
429
- uint8_t reserved2[75];
430
- } hsa_ext_control_directives_t;
431
-
432
- /**
433
- * @brief Finalize an HSAIL program for a given instruction set architecture.
434
- *
435
- * @details Finalize all of the kernels and indirect functions that belong to
436
- * the same HSAIL program for a specific instruction set architecture (ISA). The
437
- * transitive closure of all functions specified by call or scall must be
438
- * defined. Kernels and indirect functions that are being finalized must be
439
- * defined. Kernels and indirect functions that are referenced in kernels and
440
- * indirect functions being finalized may or may not be defined, but must be
441
- * declared. All the global/readonly segment variables that are referenced in
442
- * kernels and indirect functions being finalized may or may not be defined, but
443
- * must be declared.
444
- *
445
- * @param[in] program HSAIL program.
446
- *
447
- * @param[in] isa Instruction set architecture to finalize for.
448
- *
449
- * @param[in] call_convention A call convention used in a finalization. Must
450
- * have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
451
- * and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
452
- * isa (not inclusive).
453
- *
454
- * @param[in] control_directives Low-level control directives that influence
455
- * the finalization process.
456
- *
457
- * @param[in] options Vendor-specific options. May be NULL.
458
- *
459
- * @param[in] code_object_type Type of code object to produce.
460
- *
461
- * @param[out] code_object Code object generated by the Finalizer, which
462
- * contains the machine code for the kernels and indirect functions in the HSAIL
463
- * program. The code object is independent of the HSAIL module that was used to
464
- * generate it.
465
- *
466
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
467
- *
468
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
469
- * initialized.
470
- *
471
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
472
- * resources required for the operation.
473
- *
474
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
475
- * invalid.
476
- *
477
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
478
- *
479
- * @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
480
- * the control directive structure and in the HSAIL kernel mismatch, or if the
481
- * same directive is used with a different value in one of the functions used by
482
- * this kernel.
483
- *
484
- * @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
485
- * encountered an error while compiling a kernel or an indirect function.
486
- */
487
- hsa_status_t HSA_API hsa_ext_program_finalize(
488
- hsa_ext_program_t program,
489
- hsa_isa_t isa,
490
- int32_t call_convention,
491
- hsa_ext_control_directives_t control_directives,
492
- const char *options,
493
- hsa_code_object_type_t code_object_type,
494
- hsa_code_object_t *code_object);
495
-
496
- /** @} */
497
-
498
- #define hsa_ext_finalizer_1_00
499
-
500
- typedef struct hsa_ext_finalizer_1_00_pfn_s {
501
- hsa_status_t (*hsa_ext_program_create)(
502
- hsa_machine_model_t machine_model, hsa_profile_t profile,
503
- hsa_default_float_rounding_mode_t default_float_rounding_mode,
504
- const char *options, hsa_ext_program_t *program);
505
-
506
- hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
507
-
508
- hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
509
- hsa_ext_module_t module);
510
-
511
- hsa_status_t (*hsa_ext_program_iterate_modules)(
512
- hsa_ext_program_t program,
513
- hsa_status_t (*callback)(hsa_ext_program_t program,
514
- hsa_ext_module_t module, void *data),
515
- void *data);
516
-
517
- hsa_status_t (*hsa_ext_program_get_info)(
518
- hsa_ext_program_t program, hsa_ext_program_info_t attribute,
519
- void *value);
520
-
521
- hsa_status_t (*hsa_ext_program_finalize)(
522
- hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
523
- hsa_ext_control_directives_t control_directives, const char *options,
524
- hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
525
- } hsa_ext_finalizer_1_00_pfn_t;
526
-
527
- #ifdef __cplusplus
528
- } // extern "C" block
529
- #endif // __cplusplus
530
-
531
- #endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_