triton-windows 3.3.1.post19__cp310-cp310-win_amd64.whl → 3.3.1.post21__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/backends/amd/driver.py +6 -1
- triton/backends/nvidia/compiler.py +1 -3
- triton/backends/nvidia/driver.py +7 -3
- triton/runtime/autotuner.py +2 -2
- triton/runtime/build.py +5 -5
- triton/windows_utils.py +11 -4
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/METADATA +1 -1
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/RECORD +11 -108
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/WHEEL +0 -0
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/top_level.txt +0 -0
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
-
|
|
4
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
6
|
-
in the Software without restriction, including without limitation the rights
|
|
7
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
8
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
9
|
-
furnished to do so, subject to the following conditions:
|
|
10
|
-
|
|
11
|
-
The above copyright notice and this permission notice shall be included in
|
|
12
|
-
all copies or substantial portions of the Software.
|
|
13
|
-
|
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
20
|
-
THE SOFTWARE.
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
#ifndef HIP_INCLUDE_AMD_HIP_GL_INTEROP_H
|
|
24
|
-
#define HIP_INCLUDE_AMD_HIP_GL_INTEROP_H
|
|
25
|
-
|
|
26
|
-
#if defined(__cplusplus)
|
|
27
|
-
extern "C" {
|
|
28
|
-
#endif
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
*
|
|
32
|
-
* @addtogroup GlobalDefs
|
|
33
|
-
* @{
|
|
34
|
-
*
|
|
35
|
-
*/
|
|
36
|
-
|
|
37
|
-
/**
|
|
38
|
-
* HIP Devices used by current OpenGL Context.
|
|
39
|
-
*/
|
|
40
|
-
typedef enum hipGLDeviceList {
|
|
41
|
-
hipGLDeviceListAll = 1, ///< All hip devices used by current OpenGL context.
|
|
42
|
-
hipGLDeviceListCurrentFrame = 2, ///< Hip devices used by current OpenGL context in current
|
|
43
|
-
///< frame
|
|
44
|
-
hipGLDeviceListNextFrame = 3 ///< Hip devices used by current OpenGL context in next
|
|
45
|
-
///< frame.
|
|
46
|
-
} hipGLDeviceList;
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
/** GLuint as uint.*/
|
|
50
|
-
typedef unsigned int GLuint;
|
|
51
|
-
/** GLenum as uint.*/
|
|
52
|
-
typedef unsigned int GLenum;
|
|
53
|
-
/**
|
|
54
|
-
* @}
|
|
55
|
-
*/
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* @ingroup GL
|
|
59
|
-
* @{
|
|
60
|
-
*
|
|
61
|
-
*/
|
|
62
|
-
/**
|
|
63
|
-
* @brief Queries devices associated with the current OpenGL context.
|
|
64
|
-
*
|
|
65
|
-
* @param [out] pHipDeviceCount - Pointer of number of devices on the current GL context.
|
|
66
|
-
* @param [out] pHipDevices - Pointer of devices on the current OpenGL context.
|
|
67
|
-
* @param [in] hipDeviceCount - Size of device.
|
|
68
|
-
* @param [in] deviceList - The setting of devices. It could be either hipGLDeviceListCurrentFrame
|
|
69
|
-
* for the devices used to render the current frame, or hipGLDeviceListAll for all devices.
|
|
70
|
-
* The default setting is Invalid deviceList value.
|
|
71
|
-
*
|
|
72
|
-
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported
|
|
73
|
-
*
|
|
74
|
-
*/
|
|
75
|
-
hipError_t hipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices,
|
|
76
|
-
unsigned int hipDeviceCount, hipGLDeviceList deviceList);
|
|
77
|
-
/**
|
|
78
|
-
* @brief Registers a GL Buffer for interop and returns corresponding graphics resource.
|
|
79
|
-
*
|
|
80
|
-
* @param [out] resource - Returns pointer of graphics resource.
|
|
81
|
-
* @param [in] buffer - Buffer to be registered.
|
|
82
|
-
* @param [in] flags - Register flags.
|
|
83
|
-
*
|
|
84
|
-
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorInvalidResourceHandle
|
|
85
|
-
*
|
|
86
|
-
*/
|
|
87
|
-
hipError_t hipGraphicsGLRegisterBuffer(hipGraphicsResource** resource, GLuint buffer,
|
|
88
|
-
unsigned int flags);
|
|
89
|
-
/**
|
|
90
|
-
* @brief Register a GL Image for interop and returns the corresponding graphic resource.
|
|
91
|
-
*
|
|
92
|
-
* @param [out] resource - Returns pointer of graphics resource.
|
|
93
|
-
* @param [in] image - Image to be registered.
|
|
94
|
-
* @param [in] target - Valid target value Id.
|
|
95
|
-
* @param [in] flags - Register flags.
|
|
96
|
-
*
|
|
97
|
-
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorInvalidResourceHandle
|
|
98
|
-
*
|
|
99
|
-
*/
|
|
100
|
-
hipError_t hipGraphicsGLRegisterImage(hipGraphicsResource** resource, GLuint image,
|
|
101
|
-
GLenum target, unsigned int flags);
|
|
102
|
-
/**
|
|
103
|
-
* @}
|
|
104
|
-
*/
|
|
105
|
-
#if defined(__cplusplus)
|
|
106
|
-
}
|
|
107
|
-
#endif /* __cplusplus */
|
|
108
|
-
#endif /* HIP_INCLUDE_AMD_HIP_GL_INTEROP_H */
|
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
-
|
|
4
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
6
|
-
in the Software without restriction, including without limitation the rights
|
|
7
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
8
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
9
|
-
furnished to do so, subject to the following conditions:
|
|
10
|
-
|
|
11
|
-
The above copyright notice and this permission notice shall be included in
|
|
12
|
-
all copies or substantial portions of the Software.
|
|
13
|
-
|
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
20
|
-
THE SOFTWARE.
|
|
21
|
-
*/
|
|
22
|
-
#ifndef AMD_HIP_MATH_CONSTANTS_H
|
|
23
|
-
#define AMD_HIP_MATH_CONSTANTS_H
|
|
24
|
-
|
|
25
|
-
// single precision constants
|
|
26
|
-
#define HIP_INF_F __int_as_float(0x7f800000U)
|
|
27
|
-
#define HIP_NAN_F __int_as_float(0x7fffffffU)
|
|
28
|
-
#define HIP_MIN_DENORM_F __int_as_float(0x00000001U)
|
|
29
|
-
#define HIP_MAX_NORMAL_F __int_as_float(0x7f7fffffU)
|
|
30
|
-
#define HIP_NEG_ZERO_F __int_as_float(0x80000000U)
|
|
31
|
-
#define HIP_ZERO_F 0.0F
|
|
32
|
-
#define HIP_ONE_F 1.0F
|
|
33
|
-
#define HIP_SQRT_HALF_F 0.707106781F
|
|
34
|
-
#define HIP_SQRT_HALF_HI_F 0.707106781F
|
|
35
|
-
#define HIP_SQRT_HALF_LO_F 1.210161749e-08F
|
|
36
|
-
#define HIP_SQRT_TWO_F 1.414213562F
|
|
37
|
-
#define HIP_THIRD_F 0.333333333F
|
|
38
|
-
#define HIP_PIO4_F 0.785398163F
|
|
39
|
-
#define HIP_PIO2_F 1.570796327F
|
|
40
|
-
#define HIP_3PIO4_F 2.356194490F
|
|
41
|
-
#define HIP_2_OVER_PI_F 0.636619772F
|
|
42
|
-
#define HIP_SQRT_2_OVER_PI_F 0.797884561F
|
|
43
|
-
#define HIP_PI_F 3.141592654F
|
|
44
|
-
#define HIP_L2E_F 1.442695041F
|
|
45
|
-
#define HIP_L2T_F 3.321928094F
|
|
46
|
-
#define HIP_LG2_F 0.301029996F
|
|
47
|
-
#define HIP_LGE_F 0.434294482F
|
|
48
|
-
#define HIP_LN2_F 0.693147181F
|
|
49
|
-
#define HIP_LNT_F 2.302585093F
|
|
50
|
-
#define HIP_LNPI_F 1.144729886F
|
|
51
|
-
#define HIP_TWO_TO_M126_F 1.175494351e-38F
|
|
52
|
-
#define HIP_TWO_TO_126_F 8.507059173e37F
|
|
53
|
-
#define HIP_NORM_HUGE_F 3.402823466e38F
|
|
54
|
-
#define HIP_TWO_TO_23_F 8388608.0F
|
|
55
|
-
#define HIP_TWO_TO_24_F 16777216.0F
|
|
56
|
-
#define HIP_TWO_TO_31_F 2147483648.0F
|
|
57
|
-
#define HIP_TWO_TO_32_F 4294967296.0F
|
|
58
|
-
#define HIP_REMQUO_BITS_F 3U
|
|
59
|
-
#define HIP_REMQUO_MASK_F (~((~0U)<<HIP_REMQUO_BITS_F))
|
|
60
|
-
#define HIP_TRIG_PLOSS_F 105615.0F
|
|
61
|
-
|
|
62
|
-
// double precision constants
|
|
63
|
-
#define HIP_INF __longlong_as_double(0x7ff0000000000000ULL)
|
|
64
|
-
#define HIP_NAN __longlong_as_double(0xfff8000000000000ULL)
|
|
65
|
-
#define HIP_NEG_ZERO __longlong_as_double(0x8000000000000000ULL)
|
|
66
|
-
#define HIP_MIN_DENORM __longlong_as_double(0x0000000000000001ULL)
|
|
67
|
-
#define HIP_ZERO 0.0
|
|
68
|
-
#define HIP_ONE 1.0
|
|
69
|
-
#define HIP_SQRT_TWO 1.4142135623730951e+0
|
|
70
|
-
#define HIP_SQRT_HALF 7.0710678118654757e-1
|
|
71
|
-
#define HIP_SQRT_HALF_HI 7.0710678118654757e-1
|
|
72
|
-
#define HIP_SQRT_HALF_LO (-4.8336466567264567e-17)
|
|
73
|
-
#define HIP_THIRD 3.3333333333333333e-1
|
|
74
|
-
#define HIP_TWOTHIRD 6.6666666666666667e-1
|
|
75
|
-
#define HIP_PIO4 7.8539816339744828e-1
|
|
76
|
-
#define HIP_PIO4_HI 7.8539816339744828e-1
|
|
77
|
-
#define HIP_PIO4_LO 3.0616169978683830e-17
|
|
78
|
-
#define HIP_PIO2 1.5707963267948966e+0
|
|
79
|
-
#define HIP_PIO2_HI 1.5707963267948966e+0
|
|
80
|
-
#define HIP_PIO2_LO 6.1232339957367660e-17
|
|
81
|
-
#define HIP_3PIO4 2.3561944901923448e+0
|
|
82
|
-
#define HIP_2_OVER_PI 6.3661977236758138e-1
|
|
83
|
-
#define HIP_PI 3.1415926535897931e+0
|
|
84
|
-
#define HIP_PI_HI 3.1415926535897931e+0
|
|
85
|
-
#define HIP_PI_LO 1.2246467991473532e-16
|
|
86
|
-
#define HIP_SQRT_2PI 2.5066282746310007e+0
|
|
87
|
-
#define HIP_SQRT_2PI_HI 2.5066282746310007e+0
|
|
88
|
-
#define HIP_SQRT_2PI_LO (-1.8328579980459167e-16)
|
|
89
|
-
#define HIP_SQRT_PIO2 1.2533141373155003e+0
|
|
90
|
-
#define HIP_SQRT_PIO2_HI 1.2533141373155003e+0
|
|
91
|
-
#define HIP_SQRT_PIO2_LO (-9.1642899902295834e-17)
|
|
92
|
-
#define HIP_SQRT_2OPI 7.9788456080286536e-1
|
|
93
|
-
#define HIP_L2E 1.4426950408889634e+0
|
|
94
|
-
#define HIP_L2E_HI 1.4426950408889634e+0
|
|
95
|
-
#define HIP_L2E_LO 2.0355273740931033e-17
|
|
96
|
-
#define HIP_L2T 3.3219280948873622e+0
|
|
97
|
-
#define HIP_LG2 3.0102999566398120e-1
|
|
98
|
-
#define HIP_LG2_HI 3.0102999566398120e-1
|
|
99
|
-
#define HIP_LG2_LO (-2.8037281277851704e-18)
|
|
100
|
-
#define HIP_LGE 4.3429448190325182e-1
|
|
101
|
-
#define HIP_LGE_HI 4.3429448190325182e-1
|
|
102
|
-
#define HIP_LGE_LO 1.09831965021676510e-17
|
|
103
|
-
#define HIP_LN2 6.9314718055994529e-1
|
|
104
|
-
#define HIP_LN2_HI 6.9314718055994529e-1
|
|
105
|
-
#define HIP_LN2_LO 2.3190468138462996e-17
|
|
106
|
-
#define HIP_LNT 2.3025850929940459e+0
|
|
107
|
-
#define HIP_LNT_HI 2.3025850929940459e+0
|
|
108
|
-
#define HIP_LNT_LO (-2.1707562233822494e-16)
|
|
109
|
-
#define HIP_LNPI 1.1447298858494002e+0
|
|
110
|
-
#define HIP_LN2_X_1024 7.0978271289338397e+2
|
|
111
|
-
#define HIP_LN2_X_1025 7.1047586007394398e+2
|
|
112
|
-
#define HIP_LN2_X_1075 7.4513321910194122e+2
|
|
113
|
-
#define HIP_LG2_X_1024 3.0825471555991675e+2
|
|
114
|
-
#define HIP_LG2_X_1075 3.2360724533877976e+2
|
|
115
|
-
#define HIP_TWO_TO_23 8388608.0
|
|
116
|
-
#define HIP_TWO_TO_52 4503599627370496.0
|
|
117
|
-
#define HIP_TWO_TO_53 9007199254740992.0
|
|
118
|
-
#define HIP_TWO_TO_54 18014398509481984.0
|
|
119
|
-
#define HIP_TWO_TO_M54 5.5511151231257827e-17
|
|
120
|
-
#define HIP_TWO_TO_M1022 2.22507385850720140e-308
|
|
121
|
-
#define HIP_TRIG_PLOSS 2147483648.0
|
|
122
|
-
#define HIP_DBL2INT_CVT 6755399441055744.0
|
|
123
|
-
|
|
124
|
-
#endif
|
|
@@ -1,405 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
-
|
|
4
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
6
|
-
in the Software without restriction, including without limitation the rights
|
|
7
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
8
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
9
|
-
furnished to do so, subject to the following conditions:
|
|
10
|
-
|
|
11
|
-
The above copyright notice and this permission notice shall be included in
|
|
12
|
-
all copies or substantial portions of the Software.
|
|
13
|
-
|
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
20
|
-
THE SOFTWARE.
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* @file amd_detail/hip_runtime.h
|
|
25
|
-
* @brief Contains definitions of APIs for HIP runtime.
|
|
26
|
-
*/
|
|
27
|
-
|
|
28
|
-
//#pragma once
|
|
29
|
-
#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_H
|
|
30
|
-
#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_H
|
|
31
|
-
|
|
32
|
-
#include <hip/amd_detail/amd_hip_common.h>
|
|
33
|
-
|
|
34
|
-
#if !defined(__HIPCC_RTC__)
|
|
35
|
-
#ifdef __cplusplus
|
|
36
|
-
#include <cstddef>
|
|
37
|
-
#else
|
|
38
|
-
#include <stddef.h>
|
|
39
|
-
#endif // __cplusplus
|
|
40
|
-
#endif // !defined(__HIPCC_RTC__)
|
|
41
|
-
|
|
42
|
-
#ifdef __cplusplus
|
|
43
|
-
extern "C" {
|
|
44
|
-
#endif
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* @brief Query the installed library build name.
|
|
48
|
-
*
|
|
49
|
-
* This function can be used even when the library is not initialized.
|
|
50
|
-
*
|
|
51
|
-
* @returns Returns a string describing the build version of the library. The
|
|
52
|
-
* string is owned by the library.
|
|
53
|
-
*/
|
|
54
|
-
const char* amd_dbgapi_get_build_name();
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
* @brief Query the installed library git hash.
|
|
58
|
-
*
|
|
59
|
-
* This function can be used even when the library is not initialized.
|
|
60
|
-
*
|
|
61
|
-
* @returns Returns git hash of the library.
|
|
62
|
-
*/
|
|
63
|
-
const char* amd_dbgapi_get_git_hash();
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* @brief Query the installed library build ID.
|
|
67
|
-
*
|
|
68
|
-
* This function can be used even when the library is not initialized.
|
|
69
|
-
*
|
|
70
|
-
* @returns Returns build ID of the library.
|
|
71
|
-
*/
|
|
72
|
-
size_t amd_dbgapi_get_build_id();
|
|
73
|
-
|
|
74
|
-
#ifdef __cplusplus
|
|
75
|
-
} /* extern "c" */
|
|
76
|
-
#endif
|
|
77
|
-
|
|
78
|
-
//---
|
|
79
|
-
// Top part of file can be compiled with any compiler
|
|
80
|
-
|
|
81
|
-
#if !defined(__HIPCC_RTC__)
|
|
82
|
-
#ifdef __cplusplus
|
|
83
|
-
#include <cmath>
|
|
84
|
-
#include <cstdint>
|
|
85
|
-
#include <tuple>
|
|
86
|
-
#else
|
|
87
|
-
#include <math.h>
|
|
88
|
-
#include <stdint.h>
|
|
89
|
-
#endif // __cplusplus
|
|
90
|
-
#else
|
|
91
|
-
#if !__HIP_NO_STD_DEFS__
|
|
92
|
-
typedef unsigned int uint32_t;
|
|
93
|
-
typedef unsigned long long uint64_t;
|
|
94
|
-
typedef signed int int32_t;
|
|
95
|
-
typedef signed long long int64_t;
|
|
96
|
-
namespace std {
|
|
97
|
-
using ::uint32_t;
|
|
98
|
-
using ::uint64_t;
|
|
99
|
-
using ::int32_t;
|
|
100
|
-
using ::int64_t;
|
|
101
|
-
}
|
|
102
|
-
#endif // __HIP_NO_STD_DEFS__
|
|
103
|
-
#endif // !defined(__HIPCC_RTC__)
|
|
104
|
-
|
|
105
|
-
#if __HIP_CLANG_ONLY__
|
|
106
|
-
|
|
107
|
-
#if !defined(__align__)
|
|
108
|
-
#define __align__(x) __attribute__((aligned(x)))
|
|
109
|
-
#endif
|
|
110
|
-
|
|
111
|
-
#define CUDA_SUCCESS hipSuccess
|
|
112
|
-
|
|
113
|
-
#if !defined(__HIPCC_RTC__)
|
|
114
|
-
#include <hip/hip_runtime_api.h>
|
|
115
|
-
#include <hip/amd_detail/amd_hip_atomic.h>
|
|
116
|
-
#include <hip/amd_detail/amd_device_functions.h>
|
|
117
|
-
#include <hip/amd_detail/amd_surface_functions.h>
|
|
118
|
-
#include <hip/amd_detail/texture_fetch_functions.h>
|
|
119
|
-
#include <hip/amd_detail/texture_indirect_functions.h>
|
|
120
|
-
extern int HIP_TRACE_API;
|
|
121
|
-
#endif // !defined(__HIPCC_RTC__)
|
|
122
|
-
|
|
123
|
-
#ifdef __cplusplus
|
|
124
|
-
#include <hip/amd_detail/hip_ldg.h>
|
|
125
|
-
#endif
|
|
126
|
-
|
|
127
|
-
#include <hip/amd_detail/host_defines.h>
|
|
128
|
-
|
|
129
|
-
// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
|
|
130
|
-
#if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
|
|
131
|
-
#define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
|
|
132
|
-
#endif
|
|
133
|
-
|
|
134
|
-
// Feature tests:
|
|
135
|
-
#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
|
|
136
|
-
// Device compile and not host compile:
|
|
137
|
-
|
|
138
|
-
// 32-bit Atomics:
|
|
139
|
-
#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
|
|
140
|
-
#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
|
|
141
|
-
#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
|
|
142
|
-
#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
|
|
143
|
-
#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1)
|
|
144
|
-
|
|
145
|
-
// 64-bit Atomics:
|
|
146
|
-
#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
|
|
147
|
-
#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (1)
|
|
148
|
-
|
|
149
|
-
// Doubles
|
|
150
|
-
#define __HIP_ARCH_HAS_DOUBLES__ (1)
|
|
151
|
-
|
|
152
|
-
// warp cross-lane operations:
|
|
153
|
-
#define __HIP_ARCH_HAS_WARP_VOTE__ (1)
|
|
154
|
-
#define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
|
|
155
|
-
#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
|
|
156
|
-
#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
|
|
157
|
-
|
|
158
|
-
// sync
|
|
159
|
-
#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1)
|
|
160
|
-
#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
|
|
161
|
-
|
|
162
|
-
// misc
|
|
163
|
-
#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
|
|
164
|
-
#define __HIP_ARCH_HAS_3DGRID__ (1)
|
|
165
|
-
#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
|
|
166
|
-
|
|
167
|
-
#endif /* Device feature flags */
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
#define launch_bounds_impl0(requiredMaxThreadsPerBlock) \
|
|
171
|
-
__attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))
|
|
172
|
-
#define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \
|
|
173
|
-
__attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \
|
|
174
|
-
amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))
|
|
175
|
-
#define select_impl_(_1, _2, impl_, ...) impl_
|
|
176
|
-
#define __launch_bounds__(...) \
|
|
177
|
-
select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0, )(__VA_ARGS__)
|
|
178
|
-
|
|
179
|
-
#if !defined(__HIPCC_RTC__)
|
|
180
|
-
__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
|
|
181
|
-
#endif // !defined(__HIPCC_RTC__)
|
|
182
|
-
|
|
183
|
-
// End doxygen API:
|
|
184
|
-
/**
|
|
185
|
-
* @}
|
|
186
|
-
*/
|
|
187
|
-
|
|
188
|
-
//
|
|
189
|
-
// hip-clang functions
|
|
190
|
-
//
|
|
191
|
-
#if !defined(__HIPCC_RTC__)
|
|
192
|
-
#define HIP_KERNEL_NAME(...) __VA_ARGS__
|
|
193
|
-
#define HIP_SYMBOL(X) X
|
|
194
|
-
|
|
195
|
-
typedef int hipLaunchParm;
|
|
196
|
-
|
|
197
|
-
template <std::size_t n, typename... Ts,
|
|
198
|
-
typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
|
|
199
|
-
void pArgs(const std::tuple<Ts...>&, void*) {}
|
|
200
|
-
|
|
201
|
-
template <std::size_t n, typename... Ts,
|
|
202
|
-
typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
|
|
203
|
-
void pArgs(const std::tuple<Ts...>& formals, void** _vargs) {
|
|
204
|
-
using T = typename std::tuple_element<n, std::tuple<Ts...> >::type;
|
|
205
|
-
|
|
206
|
-
static_assert(!std::is_reference<T>{},
|
|
207
|
-
"A __global__ function cannot have a reference as one of its "
|
|
208
|
-
"arguments.");
|
|
209
|
-
#if defined(HIP_STRICT)
|
|
210
|
-
static_assert(std::is_trivially_copyable<T>{},
|
|
211
|
-
"Only TriviallyCopyable types can be arguments to a __global__ "
|
|
212
|
-
"function");
|
|
213
|
-
#endif
|
|
214
|
-
_vargs[n] = const_cast<void*>(reinterpret_cast<const void*>(&std::get<n>(formals)));
|
|
215
|
-
return pArgs<n + 1>(formals, _vargs);
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
template <typename... Formals, typename... Actuals>
|
|
219
|
-
std::tuple<Formals...> validateArgsCountType(void (*kernel)(Formals...), std::tuple<Actuals...>(actuals)) {
|
|
220
|
-
static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch");
|
|
221
|
-
std::tuple<Formals...> to_formals{std::move(actuals)};
|
|
222
|
-
return to_formals;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
#if defined(HIP_TEMPLATE_KERNEL_LAUNCH)
|
|
226
|
-
template <typename... Args, typename F = void (*)(Args...)>
|
|
227
|
-
void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
|
228
|
-
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) {
|
|
229
|
-
constexpr size_t count = sizeof...(Args);
|
|
230
|
-
auto tup_ = std::tuple<Args...>{args...};
|
|
231
|
-
auto tup = validateArgsCountType(kernel, tup_);
|
|
232
|
-
void* _Args[count];
|
|
233
|
-
pArgs<0>(tup, _Args);
|
|
234
|
-
|
|
235
|
-
auto k = reinterpret_cast<void*>(kernel);
|
|
236
|
-
hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream);
|
|
237
|
-
}
|
|
238
|
-
#else
|
|
239
|
-
#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \
|
|
240
|
-
do { \
|
|
241
|
-
kernelName<<<(numBlocks), (numThreads), (memPerBlock), (streamId)>>>(__VA_ARGS__); \
|
|
242
|
-
} while (0)
|
|
243
|
-
|
|
244
|
-
#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
|
|
245
|
-
#endif
|
|
246
|
-
|
|
247
|
-
#include <hip/hip_runtime_api.h>
|
|
248
|
-
#endif // !defined(__HIPCC_RTC__)
|
|
249
|
-
|
|
250
|
-
#if defined(__HIPCC_RTC__)
|
|
251
|
-
typedef struct dim3 {
|
|
252
|
-
uint32_t x; ///< x
|
|
253
|
-
uint32_t y; ///< y
|
|
254
|
-
uint32_t z; ///< z
|
|
255
|
-
#ifdef __cplusplus
|
|
256
|
-
constexpr __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
|
|
257
|
-
#endif
|
|
258
|
-
} dim3;
|
|
259
|
-
#endif // !defined(__HIPCC_RTC__)
|
|
260
|
-
|
|
261
|
-
#pragma push_macro("__DEVICE__")
|
|
262
|
-
#define __DEVICE__ static __device__ __forceinline__
|
|
263
|
-
|
|
264
|
-
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(unsigned int);
|
|
265
|
-
__DEVICE__ unsigned int __hip_get_thread_idx_x() { return __ockl_get_local_id(0); }
|
|
266
|
-
__DEVICE__ unsigned int __hip_get_thread_idx_y() { return __ockl_get_local_id(1); }
|
|
267
|
-
__DEVICE__ unsigned int __hip_get_thread_idx_z() { return __ockl_get_local_id(2); }
|
|
268
|
-
|
|
269
|
-
extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(unsigned int);
|
|
270
|
-
__DEVICE__ unsigned int __hip_get_block_idx_x() { return __ockl_get_group_id(0); }
|
|
271
|
-
__DEVICE__ unsigned int __hip_get_block_idx_y() { return __ockl_get_group_id(1); }
|
|
272
|
-
__DEVICE__ unsigned int __hip_get_block_idx_z() { return __ockl_get_group_id(2); }
|
|
273
|
-
|
|
274
|
-
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(unsigned int);
|
|
275
|
-
__DEVICE__ unsigned int __hip_get_block_dim_x() { return __ockl_get_local_size(0); }
|
|
276
|
-
__DEVICE__ unsigned int __hip_get_block_dim_y() { return __ockl_get_local_size(1); }
|
|
277
|
-
__DEVICE__ unsigned int __hip_get_block_dim_z() { return __ockl_get_local_size(2); }
|
|
278
|
-
|
|
279
|
-
extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(unsigned int);
|
|
280
|
-
__DEVICE__ unsigned int __hip_get_grid_dim_x() { return __ockl_get_num_groups(0); }
|
|
281
|
-
__DEVICE__ unsigned int __hip_get_grid_dim_y() { return __ockl_get_num_groups(1); }
|
|
282
|
-
__DEVICE__ unsigned int __hip_get_grid_dim_z() { return __ockl_get_num_groups(2); }
|
|
283
|
-
|
|
284
|
-
#define __HIP_DEVICE_BUILTIN(DIMENSION, FUNCTION) \
|
|
285
|
-
__declspec(property(get = __get_##DIMENSION)) unsigned int DIMENSION; \
|
|
286
|
-
__DEVICE__ unsigned int __get_##DIMENSION(void) { \
|
|
287
|
-
return FUNCTION; \
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
struct __hip_builtin_threadIdx_t {
|
|
291
|
-
__HIP_DEVICE_BUILTIN(x,__hip_get_thread_idx_x());
|
|
292
|
-
__HIP_DEVICE_BUILTIN(y,__hip_get_thread_idx_y());
|
|
293
|
-
__HIP_DEVICE_BUILTIN(z,__hip_get_thread_idx_z());
|
|
294
|
-
#ifdef __cplusplus
|
|
295
|
-
__device__ operator dim3() const { return dim3(x, y, z); }
|
|
296
|
-
#endif
|
|
297
|
-
};
|
|
298
|
-
|
|
299
|
-
struct __hip_builtin_blockIdx_t {
|
|
300
|
-
__HIP_DEVICE_BUILTIN(x,__hip_get_block_idx_x());
|
|
301
|
-
__HIP_DEVICE_BUILTIN(y,__hip_get_block_idx_y());
|
|
302
|
-
__HIP_DEVICE_BUILTIN(z,__hip_get_block_idx_z());
|
|
303
|
-
#ifdef __cplusplus
|
|
304
|
-
__device__ operator dim3() const { return dim3(x, y, z); }
|
|
305
|
-
#endif
|
|
306
|
-
};
|
|
307
|
-
|
|
308
|
-
struct __hip_builtin_blockDim_t {
|
|
309
|
-
__HIP_DEVICE_BUILTIN(x,__hip_get_block_dim_x());
|
|
310
|
-
__HIP_DEVICE_BUILTIN(y,__hip_get_block_dim_y());
|
|
311
|
-
__HIP_DEVICE_BUILTIN(z,__hip_get_block_dim_z());
|
|
312
|
-
#ifdef __cplusplus
|
|
313
|
-
__device__ operator dim3() const { return dim3(x, y, z); }
|
|
314
|
-
#endif
|
|
315
|
-
};
|
|
316
|
-
|
|
317
|
-
struct __hip_builtin_gridDim_t {
|
|
318
|
-
__HIP_DEVICE_BUILTIN(x,__hip_get_grid_dim_x());
|
|
319
|
-
__HIP_DEVICE_BUILTIN(y,__hip_get_grid_dim_y());
|
|
320
|
-
__HIP_DEVICE_BUILTIN(z,__hip_get_grid_dim_z());
|
|
321
|
-
#ifdef __cplusplus
|
|
322
|
-
__device__ operator dim3() const { return dim3(x, y, z); }
|
|
323
|
-
#endif
|
|
324
|
-
};
|
|
325
|
-
|
|
326
|
-
#undef __HIP_DEVICE_BUILTIN
|
|
327
|
-
#pragma pop_macro("__DEVICE__")
|
|
328
|
-
|
|
329
|
-
extern const __device__ __attribute__((weak)) __hip_builtin_threadIdx_t threadIdx;
|
|
330
|
-
extern const __device__ __attribute__((weak)) __hip_builtin_blockIdx_t blockIdx;
|
|
331
|
-
extern const __device__ __attribute__((weak)) __hip_builtin_blockDim_t blockDim;
|
|
332
|
-
extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim;
|
|
333
|
-
|
|
334
|
-
#define hipThreadIdx_x threadIdx.x
|
|
335
|
-
#define hipThreadIdx_y threadIdx.y
|
|
336
|
-
#define hipThreadIdx_z threadIdx.z
|
|
337
|
-
|
|
338
|
-
#define hipBlockIdx_x blockIdx.x
|
|
339
|
-
#define hipBlockIdx_y blockIdx.y
|
|
340
|
-
#define hipBlockIdx_z blockIdx.z
|
|
341
|
-
|
|
342
|
-
#define hipBlockDim_x blockDim.x
|
|
343
|
-
#define hipBlockDim_y blockDim.y
|
|
344
|
-
#define hipBlockDim_z blockDim.z
|
|
345
|
-
|
|
346
|
-
#define hipGridDim_x gridDim.x
|
|
347
|
-
#define hipGridDim_y gridDim.y
|
|
348
|
-
#define hipGridDim_z gridDim.z
|
|
349
|
-
|
|
350
|
-
#if !defined(__HIPCC_RTC__)
|
|
351
|
-
#include <hip/amd_detail/amd_math_functions.h>
|
|
352
|
-
#endif
|
|
353
|
-
|
|
354
|
-
#if __HIP_HCC_COMPAT_MODE__
|
|
355
|
-
// Define HCC work item functions in terms of HIP builtin variables.
|
|
356
|
-
#pragma push_macro("__DEFINE_HCC_FUNC")
|
|
357
|
-
#define __DEFINE_HCC_FUNC(hc_fun,hip_var) \
|
|
358
|
-
inline __device__ __attribute__((always_inline)) unsigned int hc_get_##hc_fun(unsigned int i) { \
|
|
359
|
-
if (i==0) \
|
|
360
|
-
return hip_var.x; \
|
|
361
|
-
else if(i==1) \
|
|
362
|
-
return hip_var.y; \
|
|
363
|
-
else \
|
|
364
|
-
return hip_var.z; \
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
__DEFINE_HCC_FUNC(workitem_id, threadIdx)
|
|
368
|
-
__DEFINE_HCC_FUNC(group_id, blockIdx)
|
|
369
|
-
__DEFINE_HCC_FUNC(group_size, blockDim)
|
|
370
|
-
__DEFINE_HCC_FUNC(num_groups, gridDim)
|
|
371
|
-
#pragma pop_macro("__DEFINE_HCC_FUNC")
|
|
372
|
-
|
|
373
|
-
extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(unsigned int);
|
|
374
|
-
inline __device__ __attribute__((always_inline)) unsigned int
|
|
375
|
-
hc_get_workitem_absolute_id(int dim)
|
|
376
|
-
{
|
|
377
|
-
return (unsigned int)__ockl_get_global_id(dim);
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
#endif
|
|
381
|
-
|
|
382
|
-
#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
|
|
383
|
-
#if !defined(__HIPCC_RTC__)
|
|
384
|
-
// Support std::complex.
|
|
385
|
-
#if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
|
|
386
|
-
#pragma push_macro("__CUDA__")
|
|
387
|
-
#define __CUDA__
|
|
388
|
-
#include <__clang_cuda_math_forward_declares.h>
|
|
389
|
-
#include <__clang_cuda_complex_builtins.h>
|
|
390
|
-
// Workaround for using libc++ with HIP-Clang.
|
|
391
|
-
// The following headers requires clang include path before standard C++ include path.
|
|
392
|
-
// However libc++ include path requires to be before clang include path.
|
|
393
|
-
// To workaround this, we pass -isystem with the parent directory of clang include
|
|
394
|
-
// path instead of the clang include path itself.
|
|
395
|
-
#include <include/cuda_wrappers/algorithm>
|
|
396
|
-
#include <include/cuda_wrappers/complex>
|
|
397
|
-
#include <include/cuda_wrappers/new>
|
|
398
|
-
#undef __CUDA__
|
|
399
|
-
#pragma pop_macro("__CUDA__")
|
|
400
|
-
#endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
|
|
401
|
-
#endif // !defined(__HIPCC_RTC__)
|
|
402
|
-
#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
|
|
403
|
-
#endif // __HIP_CLANG_ONLY__
|
|
404
|
-
|
|
405
|
-
#endif // HIP_AMD_DETAIL_RUNTIME_H
|