triton-windows 3.3.1.post19__cp311-cp311-win_amd64.whl → 3.5.0.post21__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (225) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +11 -2
  3. triton/_filecheck.py +97 -0
  4. triton/_internal_testing.py +95 -18
  5. triton/_utils.py +112 -21
  6. triton/backends/__init__.py +20 -23
  7. triton/backends/amd/__init__.py +0 -0
  8. triton/backends/amd/compiler.py +161 -119
  9. triton/backends/amd/driver.c +118 -46
  10. triton/backends/amd/driver.py +274 -96
  11. triton/backends/compiler.py +7 -21
  12. triton/backends/driver.py +13 -0
  13. triton/backends/nvidia/bin/ptxas.exe +0 -0
  14. triton/backends/nvidia/compiler.py +163 -106
  15. triton/backends/nvidia/driver.c +166 -101
  16. triton/backends/nvidia/driver.py +384 -202
  17. triton/compiler/__init__.py +5 -2
  18. triton/compiler/code_generator.py +439 -231
  19. triton/compiler/compiler.py +152 -84
  20. triton/experimental/__init__.py +0 -0
  21. triton/experimental/gluon/__init__.py +5 -0
  22. triton/experimental/gluon/_compiler.py +0 -0
  23. triton/experimental/gluon/_runtime.py +102 -0
  24. triton/experimental/gluon/language/__init__.py +119 -0
  25. triton/experimental/gluon/language/_core.py +490 -0
  26. triton/experimental/gluon/language/_layouts.py +583 -0
  27. triton/experimental/gluon/language/_math.py +20 -0
  28. triton/experimental/gluon/language/_semantic.py +380 -0
  29. triton/experimental/gluon/language/_standard.py +80 -0
  30. triton/experimental/gluon/language/amd/__init__.py +4 -0
  31. triton/experimental/gluon/language/amd/_layouts.py +96 -0
  32. triton/experimental/gluon/language/amd/cdna3/__init__.py +100 -0
  33. triton/experimental/gluon/language/amd/cdna4/__init__.py +48 -0
  34. triton/experimental/gluon/language/amd/cdna4/async_copy.py +151 -0
  35. triton/experimental/gluon/language/extra/__init__.py +3 -0
  36. triton/experimental/gluon/language/nvidia/__init__.py +4 -0
  37. triton/experimental/gluon/language/nvidia/ampere/__init__.py +3 -0
  38. triton/experimental/gluon/language/nvidia/ampere/async_copy.py +74 -0
  39. triton/experimental/gluon/language/nvidia/ampere/mbarrier.py +80 -0
  40. triton/experimental/gluon/language/nvidia/blackwell/__init__.py +387 -0
  41. triton/experimental/gluon/language/nvidia/blackwell/tma.py +52 -0
  42. triton/experimental/gluon/language/nvidia/hopper/__init__.py +132 -0
  43. triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +34 -0
  44. triton/experimental/gluon/language/nvidia/hopper/tma.py +97 -0
  45. triton/experimental/gluon/nvidia/__init__.py +4 -0
  46. triton/experimental/gluon/nvidia/blackwell.py +3 -0
  47. triton/experimental/gluon/nvidia/hopper.py +45 -0
  48. triton/knobs.py +546 -0
  49. triton/language/__init__.py +50 -19
  50. triton/language/core.py +909 -572
  51. triton/language/extra/cuda/__init__.py +10 -7
  52. triton/language/extra/cuda/gdc.py +42 -0
  53. triton/language/extra/cuda/libdevice.py +394 -394
  54. triton/language/extra/cuda/utils.py +21 -21
  55. triton/language/extra/hip/__init__.py +3 -1
  56. triton/language/extra/hip/libdevice.py +120 -104
  57. triton/language/extra/hip/utils.py +35 -0
  58. triton/language/extra/libdevice.py +4 -0
  59. triton/language/math.py +65 -66
  60. triton/language/random.py +12 -2
  61. triton/language/semantic.py +1757 -1768
  62. triton/language/standard.py +127 -62
  63. triton/language/target_info.py +54 -0
  64. triton/runtime/_allocation.py +15 -3
  65. triton/runtime/_async_compile.py +55 -0
  66. triton/runtime/autotuner.py +117 -60
  67. triton/runtime/build.py +83 -17
  68. triton/runtime/cache.py +61 -47
  69. triton/runtime/driver.py +25 -47
  70. triton/runtime/interpreter.py +95 -50
  71. triton/runtime/jit.py +445 -248
  72. triton/runtime/tcc/include/_mingw.h +8 -10
  73. triton/runtime/tcc/include/assert.h +5 -0
  74. triton/runtime/tcc/include/errno.h +1 -1
  75. triton/runtime/tcc/include/float.h +21 -3
  76. triton/runtime/tcc/include/iso646.h +36 -0
  77. triton/runtime/tcc/include/limits.h +5 -0
  78. triton/runtime/tcc/include/malloc.h +2 -2
  79. triton/runtime/tcc/include/math.h +21 -261
  80. triton/runtime/tcc/include/stdalign.h +16 -0
  81. triton/runtime/tcc/include/stdarg.h +5 -70
  82. triton/runtime/tcc/include/stdatomic.h +171 -0
  83. triton/runtime/tcc/include/stddef.h +7 -19
  84. triton/runtime/tcc/include/stdlib.h +15 -4
  85. triton/runtime/tcc/include/stdnoreturn.h +7 -0
  86. triton/runtime/tcc/include/sys/stat.h +2 -2
  87. triton/runtime/tcc/include/sys/types.h +5 -0
  88. triton/runtime/tcc/include/tcc/tcc_libm.h +444 -27
  89. triton/runtime/tcc/include/tccdefs.h +342 -0
  90. triton/runtime/tcc/include/tgmath.h +89 -0
  91. triton/runtime/tcc/include/uchar.h +33 -0
  92. triton/runtime/tcc/include/unistd.h +1 -0
  93. triton/runtime/tcc/include/winapi/qos.h +72 -0
  94. triton/runtime/tcc/include/winapi/shellapi.h +59 -0
  95. triton/runtime/tcc/include/winapi/winbase.h +9 -2
  96. triton/runtime/tcc/include/winapi/wincon.h +8 -0
  97. triton/runtime/tcc/include/winapi/windows.h +1 -1
  98. triton/runtime/tcc/include/winapi/winnls.h +778 -0
  99. triton/runtime/tcc/include/winapi/winnt.h +9 -7
  100. triton/runtime/tcc/include/winapi/winsock2.h +1474 -0
  101. triton/runtime/tcc/include/winapi/ws2ipdef.h +21 -0
  102. triton/runtime/tcc/include/winapi/ws2tcpip.h +391 -0
  103. triton/runtime/tcc/lib/libtcc1.a +0 -0
  104. triton/runtime/tcc/lib/python314.def +1800 -0
  105. triton/runtime/tcc/lib/python314t.def +1809 -0
  106. triton/runtime/tcc/libtcc.dll +0 -0
  107. triton/runtime/tcc/tcc.exe +0 -0
  108. triton/testing.py +16 -12
  109. triton/tools/compile.py +62 -14
  110. triton/tools/disasm.py +3 -4
  111. triton/tools/extra/cuda/compile.c +1 -0
  112. triton/tools/extra/hip/compile.cpp +66 -0
  113. triton/tools/extra/hip/compile.h +13 -0
  114. triton/tools/ragged_tma.py +92 -0
  115. triton/tools/tensor_descriptor.py +34 -0
  116. triton/windows_utils.py +52 -81
  117. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/METADATA +8 -4
  118. triton_windows-3.5.0.post21.dist-info/RECORD +217 -0
  119. triton_windows-3.5.0.post21.dist-info/entry_points.txt +3 -0
  120. triton_windows-3.5.0.post21.dist-info/licenses/LICENSE +23 -0
  121. triton_windows-3.5.0.post21.dist-info/top_level.txt +1 -0
  122. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  123. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
  124. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
  125. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
  126. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  127. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  128. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  129. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
  130. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  131. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
  132. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  133. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  134. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  135. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  136. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  137. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  138. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  139. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  140. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
  141. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
  142. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  143. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  144. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  145. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  146. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  147. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  148. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  149. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
  150. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  151. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  152. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  153. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  154. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  155. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
  156. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
  157. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
  158. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  159. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  160. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  161. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  162. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  163. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  164. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  165. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  166. triton/backends/amd/include/hip/device_functions.h +0 -38
  167. triton/backends/amd/include/hip/driver_types.h +0 -468
  168. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  169. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  170. triton/backends/amd/include/hip/hip_common.h +0 -100
  171. triton/backends/amd/include/hip/hip_complex.h +0 -38
  172. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  173. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  174. triton/backends/amd/include/hip/hip_ext.h +0 -161
  175. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  176. triton/backends/amd/include/hip/hip_fp8.h +0 -33
  177. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  178. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  179. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  180. triton/backends/amd/include/hip/hip_profile.h +0 -27
  181. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  182. triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
  183. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  184. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  185. triton/backends/amd/include/hip/hip_version.h +0 -17
  186. triton/backends/amd/include/hip/hiprtc.h +0 -421
  187. triton/backends/amd/include/hip/library_types.h +0 -78
  188. triton/backends/amd/include/hip/math_functions.h +0 -42
  189. triton/backends/amd/include/hip/surface_types.h +0 -63
  190. triton/backends/amd/include/hip/texture_types.h +0 -194
  191. triton/backends/amd/include/hsa/Brig.h +0 -1131
  192. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  193. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
  194. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  195. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  196. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  197. triton/backends/amd/include/hsa/hsa.h +0 -5738
  198. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  199. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
  200. triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
  201. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
  202. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  203. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  204. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  205. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  206. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
  207. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  208. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
  209. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
  210. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
  211. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  212. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  213. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  214. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  215. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  216. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  217. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  218. triton/backends/amd/include/roctracer/roctx.h +0 -229
  219. triton/language/_utils.py +0 -21
  220. triton/language/extra/cuda/_experimental_tma.py +0 -106
  221. triton/runtime/tcc/lib/libtcc1-64.a +0 -0
  222. triton/tools/experimental_descriptor.py +0 -32
  223. triton_windows-3.3.1.post19.dist-info/RECORD +0 -260
  224. triton_windows-3.3.1.post19.dist-info/top_level.txt +0 -14
  225. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/WHEEL +0 -0
@@ -1,108 +0,0 @@
1
- /*
2
- Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
3
-
4
- Permission is hereby granted, free of charge, to any person obtaining a copy
5
- of this software and associated documentation files (the "Software"), to deal
6
- in the Software without restriction, including without limitation the rights
7
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
- copies of the Software, and to permit persons to whom the Software is
9
- furnished to do so, subject to the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be included in
12
- all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
- THE SOFTWARE.
21
- */
22
-
23
- #ifndef HIP_INCLUDE_AMD_HIP_GL_INTEROP_H
24
- #define HIP_INCLUDE_AMD_HIP_GL_INTEROP_H
25
-
26
- #if defined(__cplusplus)
27
- extern "C" {
28
- #endif
29
-
30
- /**
31
- *
32
- * @addtogroup GlobalDefs
33
- * @{
34
- *
35
- */
36
-
37
- /**
38
- * HIP Devices used by current OpenGL Context.
39
- */
40
- typedef enum hipGLDeviceList {
41
- hipGLDeviceListAll = 1, ///< All hip devices used by current OpenGL context.
42
- hipGLDeviceListCurrentFrame = 2, ///< Hip devices used by current OpenGL context in current
43
- ///< frame
44
- hipGLDeviceListNextFrame = 3 ///< Hip devices used by current OpenGL context in next
45
- ///< frame.
46
- } hipGLDeviceList;
47
-
48
-
49
- /** GLuint as uint.*/
50
- typedef unsigned int GLuint;
51
- /** GLenum as uint.*/
52
- typedef unsigned int GLenum;
53
- /**
54
- * @}
55
- */
56
-
57
- /**
58
- * @ingroup GL
59
- * @{
60
- *
61
- */
62
- /**
63
- * @brief Queries devices associated with the current OpenGL context.
64
- *
65
- * @param [out] pHipDeviceCount - Pointer of number of devices on the current GL context.
66
- * @param [out] pHipDevices - Pointer of devices on the current OpenGL context.
67
- * @param [in] hipDeviceCount - Size of device.
68
- * @param [in] deviceList - The setting of devices. It could be either hipGLDeviceListCurrentFrame
69
- * for the devices used to render the current frame, or hipGLDeviceListAll for all devices.
70
- * The default setting is Invalid deviceList value.
71
- *
72
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported
73
- *
74
- */
75
- hipError_t hipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices,
76
- unsigned int hipDeviceCount, hipGLDeviceList deviceList);
77
- /**
78
- * @brief Registers a GL Buffer for interop and returns corresponding graphics resource.
79
- *
80
- * @param [out] resource - Returns pointer of graphics resource.
81
- * @param [in] buffer - Buffer to be registered.
82
- * @param [in] flags - Register flags.
83
- *
84
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorInvalidResourceHandle
85
- *
86
- */
87
- hipError_t hipGraphicsGLRegisterBuffer(hipGraphicsResource** resource, GLuint buffer,
88
- unsigned int flags);
89
- /**
90
- * @brief Register a GL Image for interop and returns the corresponding graphic resource.
91
- *
92
- * @param [out] resource - Returns pointer of graphics resource.
93
- * @param [in] image - Image to be registered.
94
- * @param [in] target - Valid target value Id.
95
- * @param [in] flags - Register flags.
96
- *
97
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorInvalidResourceHandle
98
- *
99
- */
100
- hipError_t hipGraphicsGLRegisterImage(hipGraphicsResource** resource, GLuint image,
101
- GLenum target, unsigned int flags);
102
- /**
103
- * @}
104
- */
105
- #if defined(__cplusplus)
106
- }
107
- #endif /* __cplusplus */
108
- #endif /* HIP_INCLUDE_AMD_HIP_GL_INTEROP_H */
@@ -1,124 +0,0 @@
1
- /*
2
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
3
-
4
- Permission is hereby granted, free of charge, to any person obtaining a copy
5
- of this software and associated documentation files (the "Software"), to deal
6
- in the Software without restriction, including without limitation the rights
7
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
- copies of the Software, and to permit persons to whom the Software is
9
- furnished to do so, subject to the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be included in
12
- all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
- THE SOFTWARE.
21
- */
22
- #ifndef AMD_HIP_MATH_CONSTANTS_H
23
- #define AMD_HIP_MATH_CONSTANTS_H
24
-
25
- // single precision constants
26
- #define HIP_INF_F __int_as_float(0x7f800000U)
27
- #define HIP_NAN_F __int_as_float(0x7fffffffU)
28
- #define HIP_MIN_DENORM_F __int_as_float(0x00000001U)
29
- #define HIP_MAX_NORMAL_F __int_as_float(0x7f7fffffU)
30
- #define HIP_NEG_ZERO_F __int_as_float(0x80000000U)
31
- #define HIP_ZERO_F 0.0F
32
- #define HIP_ONE_F 1.0F
33
- #define HIP_SQRT_HALF_F 0.707106781F
34
- #define HIP_SQRT_HALF_HI_F 0.707106781F
35
- #define HIP_SQRT_HALF_LO_F 1.210161749e-08F
36
- #define HIP_SQRT_TWO_F 1.414213562F
37
- #define HIP_THIRD_F 0.333333333F
38
- #define HIP_PIO4_F 0.785398163F
39
- #define HIP_PIO2_F 1.570796327F
40
- #define HIP_3PIO4_F 2.356194490F
41
- #define HIP_2_OVER_PI_F 0.636619772F
42
- #define HIP_SQRT_2_OVER_PI_F 0.797884561F
43
- #define HIP_PI_F 3.141592654F
44
- #define HIP_L2E_F 1.442695041F
45
- #define HIP_L2T_F 3.321928094F
46
- #define HIP_LG2_F 0.301029996F
47
- #define HIP_LGE_F 0.434294482F
48
- #define HIP_LN2_F 0.693147181F
49
- #define HIP_LNT_F 2.302585093F
50
- #define HIP_LNPI_F 1.144729886F
51
- #define HIP_TWO_TO_M126_F 1.175494351e-38F
52
- #define HIP_TWO_TO_126_F 8.507059173e37F
53
- #define HIP_NORM_HUGE_F 3.402823466e38F
54
- #define HIP_TWO_TO_23_F 8388608.0F
55
- #define HIP_TWO_TO_24_F 16777216.0F
56
- #define HIP_TWO_TO_31_F 2147483648.0F
57
- #define HIP_TWO_TO_32_F 4294967296.0F
58
- #define HIP_REMQUO_BITS_F 3U
59
- #define HIP_REMQUO_MASK_F (~((~0U)<<HIP_REMQUO_BITS_F))
60
- #define HIP_TRIG_PLOSS_F 105615.0F
61
-
62
- // double precision constants
63
- #define HIP_INF __longlong_as_double(0x7ff0000000000000ULL)
64
- #define HIP_NAN __longlong_as_double(0xfff8000000000000ULL)
65
- #define HIP_NEG_ZERO __longlong_as_double(0x8000000000000000ULL)
66
- #define HIP_MIN_DENORM __longlong_as_double(0x0000000000000001ULL)
67
- #define HIP_ZERO 0.0
68
- #define HIP_ONE 1.0
69
- #define HIP_SQRT_TWO 1.4142135623730951e+0
70
- #define HIP_SQRT_HALF 7.0710678118654757e-1
71
- #define HIP_SQRT_HALF_HI 7.0710678118654757e-1
72
- #define HIP_SQRT_HALF_LO (-4.8336466567264567e-17)
73
- #define HIP_THIRD 3.3333333333333333e-1
74
- #define HIP_TWOTHIRD 6.6666666666666667e-1
75
- #define HIP_PIO4 7.8539816339744828e-1
76
- #define HIP_PIO4_HI 7.8539816339744828e-1
77
- #define HIP_PIO4_LO 3.0616169978683830e-17
78
- #define HIP_PIO2 1.5707963267948966e+0
79
- #define HIP_PIO2_HI 1.5707963267948966e+0
80
- #define HIP_PIO2_LO 6.1232339957367660e-17
81
- #define HIP_3PIO4 2.3561944901923448e+0
82
- #define HIP_2_OVER_PI 6.3661977236758138e-1
83
- #define HIP_PI 3.1415926535897931e+0
84
- #define HIP_PI_HI 3.1415926535897931e+0
85
- #define HIP_PI_LO 1.2246467991473532e-16
86
- #define HIP_SQRT_2PI 2.5066282746310007e+0
87
- #define HIP_SQRT_2PI_HI 2.5066282746310007e+0
88
- #define HIP_SQRT_2PI_LO (-1.8328579980459167e-16)
89
- #define HIP_SQRT_PIO2 1.2533141373155003e+0
90
- #define HIP_SQRT_PIO2_HI 1.2533141373155003e+0
91
- #define HIP_SQRT_PIO2_LO (-9.1642899902295834e-17)
92
- #define HIP_SQRT_2OPI 7.9788456080286536e-1
93
- #define HIP_L2E 1.4426950408889634e+0
94
- #define HIP_L2E_HI 1.4426950408889634e+0
95
- #define HIP_L2E_LO 2.0355273740931033e-17
96
- #define HIP_L2T 3.3219280948873622e+0
97
- #define HIP_LG2 3.0102999566398120e-1
98
- #define HIP_LG2_HI 3.0102999566398120e-1
99
- #define HIP_LG2_LO (-2.8037281277851704e-18)
100
- #define HIP_LGE 4.3429448190325182e-1
101
- #define HIP_LGE_HI 4.3429448190325182e-1
102
- #define HIP_LGE_LO 1.09831965021676510e-17
103
- #define HIP_LN2 6.9314718055994529e-1
104
- #define HIP_LN2_HI 6.9314718055994529e-1
105
- #define HIP_LN2_LO 2.3190468138462996e-17
106
- #define HIP_LNT 2.3025850929940459e+0
107
- #define HIP_LNT_HI 2.3025850929940459e+0
108
- #define HIP_LNT_LO (-2.1707562233822494e-16)
109
- #define HIP_LNPI 1.1447298858494002e+0
110
- #define HIP_LN2_X_1024 7.0978271289338397e+2
111
- #define HIP_LN2_X_1025 7.1047586007394398e+2
112
- #define HIP_LN2_X_1075 7.4513321910194122e+2
113
- #define HIP_LG2_X_1024 3.0825471555991675e+2
114
- #define HIP_LG2_X_1075 3.2360724533877976e+2
115
- #define HIP_TWO_TO_23 8388608.0
116
- #define HIP_TWO_TO_52 4503599627370496.0
117
- #define HIP_TWO_TO_53 9007199254740992.0
118
- #define HIP_TWO_TO_54 18014398509481984.0
119
- #define HIP_TWO_TO_M54 5.5511151231257827e-17
120
- #define HIP_TWO_TO_M1022 2.22507385850720140e-308
121
- #define HIP_TRIG_PLOSS 2147483648.0
122
- #define HIP_DBL2INT_CVT 6755399441055744.0
123
-
124
- #endif
@@ -1,405 +0,0 @@
1
- /*
2
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
3
-
4
- Permission is hereby granted, free of charge, to any person obtaining a copy
5
- of this software and associated documentation files (the "Software"), to deal
6
- in the Software without restriction, including without limitation the rights
7
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
- copies of the Software, and to permit persons to whom the Software is
9
- furnished to do so, subject to the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be included in
12
- all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
- THE SOFTWARE.
21
- */
22
-
23
- /**
24
- * @file amd_detail/hip_runtime.h
25
- * @brief Contains definitions of APIs for HIP runtime.
26
- */
27
-
28
- //#pragma once
29
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_H
30
- #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_H
31
-
32
- #include <hip/amd_detail/amd_hip_common.h>
33
-
34
- #if !defined(__HIPCC_RTC__)
35
- #ifdef __cplusplus
36
- #include <cstddef>
37
- #else
38
- #include <stddef.h>
39
- #endif // __cplusplus
40
- #endif // !defined(__HIPCC_RTC__)
41
-
42
- #ifdef __cplusplus
43
- extern "C" {
44
- #endif
45
-
46
- /**
47
- * @brief Query the installed library build name.
48
- *
49
- * This function can be used even when the library is not initialized.
50
- *
51
- * @returns Returns a string describing the build version of the library. The
52
- * string is owned by the library.
53
- */
54
- const char* amd_dbgapi_get_build_name();
55
-
56
- /**
57
- * @brief Query the installed library git hash.
58
- *
59
- * This function can be used even when the library is not initialized.
60
- *
61
- * @returns Returns git hash of the library.
62
- */
63
- const char* amd_dbgapi_get_git_hash();
64
-
65
- /**
66
- * @brief Query the installed library build ID.
67
- *
68
- * This function can be used even when the library is not initialized.
69
- *
70
- * @returns Returns build ID of the library.
71
- */
72
- size_t amd_dbgapi_get_build_id();
73
-
74
- #ifdef __cplusplus
75
- } /* extern "c" */
76
- #endif
77
-
78
- //---
79
- // Top part of file can be compiled with any compiler
80
-
81
- #if !defined(__HIPCC_RTC__)
82
- #ifdef __cplusplus
83
- #include <cmath>
84
- #include <cstdint>
85
- #include <tuple>
86
- #else
87
- #include <math.h>
88
- #include <stdint.h>
89
- #endif // __cplusplus
90
- #else
91
- #if !__HIP_NO_STD_DEFS__
92
- typedef unsigned int uint32_t;
93
- typedef unsigned long long uint64_t;
94
- typedef signed int int32_t;
95
- typedef signed long long int64_t;
96
- namespace std {
97
- using ::uint32_t;
98
- using ::uint64_t;
99
- using ::int32_t;
100
- using ::int64_t;
101
- }
102
- #endif // __HIP_NO_STD_DEFS__
103
- #endif // !defined(__HIPCC_RTC__)
104
-
105
- #if __HIP_CLANG_ONLY__
106
-
107
- #if !defined(__align__)
108
- #define __align__(x) __attribute__((aligned(x)))
109
- #endif
110
-
111
- #define CUDA_SUCCESS hipSuccess
112
-
113
- #if !defined(__HIPCC_RTC__)
114
- #include <hip/hip_runtime_api.h>
115
- #include <hip/amd_detail/amd_hip_atomic.h>
116
- #include <hip/amd_detail/amd_device_functions.h>
117
- #include <hip/amd_detail/amd_surface_functions.h>
118
- #include <hip/amd_detail/texture_fetch_functions.h>
119
- #include <hip/amd_detail/texture_indirect_functions.h>
120
- extern int HIP_TRACE_API;
121
- #endif // !defined(__HIPCC_RTC__)
122
-
123
- #ifdef __cplusplus
124
- #include <hip/amd_detail/hip_ldg.h>
125
- #endif
126
-
127
- #include <hip/amd_detail/host_defines.h>
128
-
129
- // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
130
- #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
131
- #define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
132
- #endif
133
-
134
- // Feature tests:
135
- #if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
136
- // Device compile and not host compile:
137
-
138
- // 32-bit Atomics:
139
- #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
140
- #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
141
- #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
142
- #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
143
- #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1)
144
-
145
- // 64-bit Atomics:
146
- #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
147
- #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (1)
148
-
149
- // Doubles
150
- #define __HIP_ARCH_HAS_DOUBLES__ (1)
151
-
152
- // warp cross-lane operations:
153
- #define __HIP_ARCH_HAS_WARP_VOTE__ (1)
154
- #define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
155
- #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
156
- #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
157
-
158
- // sync
159
- #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1)
160
- #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
161
-
162
- // misc
163
- #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
164
- #define __HIP_ARCH_HAS_3DGRID__ (1)
165
- #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
166
-
167
- #endif /* Device feature flags */
168
-
169
-
170
- #define launch_bounds_impl0(requiredMaxThreadsPerBlock) \
171
- __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))
172
- #define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \
173
- __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \
174
- amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))
175
- #define select_impl_(_1, _2, impl_, ...) impl_
176
- #define __launch_bounds__(...) \
177
- select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0, )(__VA_ARGS__)
178
-
179
- #if !defined(__HIPCC_RTC__)
180
- __host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
181
- #endif // !defined(__HIPCC_RTC__)
182
-
183
- // End doxygen API:
184
- /**
185
- * @}
186
- */
187
-
188
- //
189
- // hip-clang functions
190
- //
191
- #if !defined(__HIPCC_RTC__)
192
- #define HIP_KERNEL_NAME(...) __VA_ARGS__
193
- #define HIP_SYMBOL(X) X
194
-
195
- typedef int hipLaunchParm;
196
-
197
- template <std::size_t n, typename... Ts,
198
- typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
199
- void pArgs(const std::tuple<Ts...>&, void*) {}
200
-
201
- template <std::size_t n, typename... Ts,
202
- typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
203
- void pArgs(const std::tuple<Ts...>& formals, void** _vargs) {
204
- using T = typename std::tuple_element<n, std::tuple<Ts...> >::type;
205
-
206
- static_assert(!std::is_reference<T>{},
207
- "A __global__ function cannot have a reference as one of its "
208
- "arguments.");
209
- #if defined(HIP_STRICT)
210
- static_assert(std::is_trivially_copyable<T>{},
211
- "Only TriviallyCopyable types can be arguments to a __global__ "
212
- "function");
213
- #endif
214
- _vargs[n] = const_cast<void*>(reinterpret_cast<const void*>(&std::get<n>(formals)));
215
- return pArgs<n + 1>(formals, _vargs);
216
- }
217
-
218
- template <typename... Formals, typename... Actuals>
219
- std::tuple<Formals...> validateArgsCountType(void (*kernel)(Formals...), std::tuple<Actuals...>(actuals)) {
220
- static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch");
221
- std::tuple<Formals...> to_formals{std::move(actuals)};
222
- return to_formals;
223
- }
224
-
225
- #if defined(HIP_TEMPLATE_KERNEL_LAUNCH)
226
- template <typename... Args, typename F = void (*)(Args...)>
227
- void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
228
- std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) {
229
- constexpr size_t count = sizeof...(Args);
230
- auto tup_ = std::tuple<Args...>{args...};
231
- auto tup = validateArgsCountType(kernel, tup_);
232
- void* _Args[count];
233
- pArgs<0>(tup, _Args);
234
-
235
- auto k = reinterpret_cast<void*>(kernel);
236
- hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream);
237
- }
238
- #else
239
- #define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \
240
- do { \
241
- kernelName<<<(numBlocks), (numThreads), (memPerBlock), (streamId)>>>(__VA_ARGS__); \
242
- } while (0)
243
-
244
- #define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
245
- #endif
246
-
247
- #include <hip/hip_runtime_api.h>
248
- #endif // !defined(__HIPCC_RTC__)
249
-
250
- #if defined(__HIPCC_RTC__)
251
- typedef struct dim3 {
252
- uint32_t x; ///< x
253
- uint32_t y; ///< y
254
- uint32_t z; ///< z
255
- #ifdef __cplusplus
256
- constexpr __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
257
- #endif
258
- } dim3;
259
- #endif // !defined(__HIPCC_RTC__)
260
-
261
- #pragma push_macro("__DEVICE__")
262
- #define __DEVICE__ static __device__ __forceinline__
263
-
264
- extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(unsigned int);
265
- __DEVICE__ unsigned int __hip_get_thread_idx_x() { return __ockl_get_local_id(0); }
266
- __DEVICE__ unsigned int __hip_get_thread_idx_y() { return __ockl_get_local_id(1); }
267
- __DEVICE__ unsigned int __hip_get_thread_idx_z() { return __ockl_get_local_id(2); }
268
-
269
- extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(unsigned int);
270
- __DEVICE__ unsigned int __hip_get_block_idx_x() { return __ockl_get_group_id(0); }
271
- __DEVICE__ unsigned int __hip_get_block_idx_y() { return __ockl_get_group_id(1); }
272
- __DEVICE__ unsigned int __hip_get_block_idx_z() { return __ockl_get_group_id(2); }
273
-
274
- extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(unsigned int);
275
- __DEVICE__ unsigned int __hip_get_block_dim_x() { return __ockl_get_local_size(0); }
276
- __DEVICE__ unsigned int __hip_get_block_dim_y() { return __ockl_get_local_size(1); }
277
- __DEVICE__ unsigned int __hip_get_block_dim_z() { return __ockl_get_local_size(2); }
278
-
279
- extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(unsigned int);
280
- __DEVICE__ unsigned int __hip_get_grid_dim_x() { return __ockl_get_num_groups(0); }
281
- __DEVICE__ unsigned int __hip_get_grid_dim_y() { return __ockl_get_num_groups(1); }
282
- __DEVICE__ unsigned int __hip_get_grid_dim_z() { return __ockl_get_num_groups(2); }
283
-
284
- #define __HIP_DEVICE_BUILTIN(DIMENSION, FUNCTION) \
285
- __declspec(property(get = __get_##DIMENSION)) unsigned int DIMENSION; \
286
- __DEVICE__ unsigned int __get_##DIMENSION(void) { \
287
- return FUNCTION; \
288
- }
289
-
290
- struct __hip_builtin_threadIdx_t {
291
- __HIP_DEVICE_BUILTIN(x,__hip_get_thread_idx_x());
292
- __HIP_DEVICE_BUILTIN(y,__hip_get_thread_idx_y());
293
- __HIP_DEVICE_BUILTIN(z,__hip_get_thread_idx_z());
294
- #ifdef __cplusplus
295
- __device__ operator dim3() const { return dim3(x, y, z); }
296
- #endif
297
- };
298
-
299
- struct __hip_builtin_blockIdx_t {
300
- __HIP_DEVICE_BUILTIN(x,__hip_get_block_idx_x());
301
- __HIP_DEVICE_BUILTIN(y,__hip_get_block_idx_y());
302
- __HIP_DEVICE_BUILTIN(z,__hip_get_block_idx_z());
303
- #ifdef __cplusplus
304
- __device__ operator dim3() const { return dim3(x, y, z); }
305
- #endif
306
- };
307
-
308
- struct __hip_builtin_blockDim_t {
309
- __HIP_DEVICE_BUILTIN(x,__hip_get_block_dim_x());
310
- __HIP_DEVICE_BUILTIN(y,__hip_get_block_dim_y());
311
- __HIP_DEVICE_BUILTIN(z,__hip_get_block_dim_z());
312
- #ifdef __cplusplus
313
- __device__ operator dim3() const { return dim3(x, y, z); }
314
- #endif
315
- };
316
-
317
- struct __hip_builtin_gridDim_t {
318
- __HIP_DEVICE_BUILTIN(x,__hip_get_grid_dim_x());
319
- __HIP_DEVICE_BUILTIN(y,__hip_get_grid_dim_y());
320
- __HIP_DEVICE_BUILTIN(z,__hip_get_grid_dim_z());
321
- #ifdef __cplusplus
322
- __device__ operator dim3() const { return dim3(x, y, z); }
323
- #endif
324
- };
325
-
326
- #undef __HIP_DEVICE_BUILTIN
327
- #pragma pop_macro("__DEVICE__")
328
-
329
- extern const __device__ __attribute__((weak)) __hip_builtin_threadIdx_t threadIdx;
330
- extern const __device__ __attribute__((weak)) __hip_builtin_blockIdx_t blockIdx;
331
- extern const __device__ __attribute__((weak)) __hip_builtin_blockDim_t blockDim;
332
- extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim;
333
-
334
- #define hipThreadIdx_x threadIdx.x
335
- #define hipThreadIdx_y threadIdx.y
336
- #define hipThreadIdx_z threadIdx.z
337
-
338
- #define hipBlockIdx_x blockIdx.x
339
- #define hipBlockIdx_y blockIdx.y
340
- #define hipBlockIdx_z blockIdx.z
341
-
342
- #define hipBlockDim_x blockDim.x
343
- #define hipBlockDim_y blockDim.y
344
- #define hipBlockDim_z blockDim.z
345
-
346
- #define hipGridDim_x gridDim.x
347
- #define hipGridDim_y gridDim.y
348
- #define hipGridDim_z gridDim.z
349
-
350
- #if !defined(__HIPCC_RTC__)
351
- #include <hip/amd_detail/amd_math_functions.h>
352
- #endif
353
-
354
- #if __HIP_HCC_COMPAT_MODE__
355
- // Define HCC work item functions in terms of HIP builtin variables.
356
- #pragma push_macro("__DEFINE_HCC_FUNC")
357
- #define __DEFINE_HCC_FUNC(hc_fun,hip_var) \
358
- inline __device__ __attribute__((always_inline)) unsigned int hc_get_##hc_fun(unsigned int i) { \
359
- if (i==0) \
360
- return hip_var.x; \
361
- else if(i==1) \
362
- return hip_var.y; \
363
- else \
364
- return hip_var.z; \
365
- }
366
-
367
- __DEFINE_HCC_FUNC(workitem_id, threadIdx)
368
- __DEFINE_HCC_FUNC(group_id, blockIdx)
369
- __DEFINE_HCC_FUNC(group_size, blockDim)
370
- __DEFINE_HCC_FUNC(num_groups, gridDim)
371
- #pragma pop_macro("__DEFINE_HCC_FUNC")
372
-
373
- extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(unsigned int);
374
- inline __device__ __attribute__((always_inline)) unsigned int
375
- hc_get_workitem_absolute_id(int dim)
376
- {
377
- return (unsigned int)__ockl_get_global_id(dim);
378
- }
379
-
380
- #endif
381
-
382
- #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
383
- #if !defined(__HIPCC_RTC__)
384
- // Support std::complex.
385
- #if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
386
- #pragma push_macro("__CUDA__")
387
- #define __CUDA__
388
- #include <__clang_cuda_math_forward_declares.h>
389
- #include <__clang_cuda_complex_builtins.h>
390
- // Workaround for using libc++ with HIP-Clang.
391
- // The following headers requires clang include path before standard C++ include path.
392
- // However libc++ include path requires to be before clang include path.
393
- // To workaround this, we pass -isystem with the parent directory of clang include
394
- // path instead of the clang include path itself.
395
- #include <include/cuda_wrappers/algorithm>
396
- #include <include/cuda_wrappers/complex>
397
- #include <include/cuda_wrappers/new>
398
- #undef __CUDA__
399
- #pragma pop_macro("__CUDA__")
400
- #endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
401
- #endif // !defined(__HIPCC_RTC__)
402
- #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
403
- #endif // __HIP_CLANG_ONLY__
404
-
405
- #endif // HIP_AMD_DETAIL_RUNTIME_H