triton-windows 3.3.1.post19__cp312-cp312-win_amd64.whl → 3.3.1.post21__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/backends/amd/driver.py +6 -1
- triton/backends/nvidia/compiler.py +1 -3
- triton/backends/nvidia/driver.py +7 -3
- triton/runtime/autotuner.py +2 -2
- triton/runtime/build.py +5 -5
- triton/windows_utils.py +11 -4
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/METADATA +1 -1
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/RECORD +11 -108
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/WHEEL +0 -0
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/top_level.txt +0 -0
|
@@ -1,1638 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
Copyright (c) 2015 - Present Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
-
|
|
4
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
6
|
-
in the Software without restriction, including without limitation the rights
|
|
7
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
8
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
9
|
-
furnished to do so, subject to the following conditions:
|
|
10
|
-
|
|
11
|
-
The above copyright notice and this permission notice shall be included in
|
|
12
|
-
all copies or substantial portions of the Software.
|
|
13
|
-
|
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
20
|
-
THE SOFTWARE.
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
#pragma once
|
|
24
|
-
|
|
25
|
-
#if !defined(__HIPCC_RTC__)
|
|
26
|
-
#include "amd_device_functions.h"
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#if __has_builtin(__hip_atomic_compare_exchange_strong)
|
|
30
|
-
|
|
31
|
-
template<bool B, typename T, typename F> struct Cond_t;
|
|
32
|
-
|
|
33
|
-
template<typename T, typename F> struct Cond_t<true, T, F> { using type = T; };
|
|
34
|
-
template<typename T, typename F> struct Cond_t<false, T, F> { using type = F; };
|
|
35
|
-
|
|
36
|
-
#if !__HIP_DEVICE_COMPILE__
|
|
37
|
-
//TODO: Remove this after compiler pre-defines the following Macros.
|
|
38
|
-
#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
|
|
39
|
-
#define __HIP_MEMORY_SCOPE_WAVEFRONT 2
|
|
40
|
-
#define __HIP_MEMORY_SCOPE_WORKGROUP 3
|
|
41
|
-
#define __HIP_MEMORY_SCOPE_AGENT 4
|
|
42
|
-
#define __HIP_MEMORY_SCOPE_SYSTEM 5
|
|
43
|
-
#endif
|
|
44
|
-
|
|
45
|
-
#if !defined(__HIPCC_RTC__)
|
|
46
|
-
#include "amd_hip_unsafe_atomics.h"
|
|
47
|
-
#endif
|
|
48
|
-
|
|
49
|
-
// Atomic expanders
|
|
50
|
-
template<
|
|
51
|
-
int mem_order = __ATOMIC_SEQ_CST,
|
|
52
|
-
int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
|
|
53
|
-
typename T,
|
|
54
|
-
typename Op,
|
|
55
|
-
typename F>
|
|
56
|
-
inline
|
|
57
|
-
__attribute__((always_inline, device))
|
|
58
|
-
T hip_cas_expander(T* p, T x, Op op, F f) noexcept
|
|
59
|
-
{
|
|
60
|
-
using FP = __attribute__((address_space(0))) const void*;
|
|
61
|
-
|
|
62
|
-
__device__
|
|
63
|
-
extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
|
|
64
|
-
|
|
65
|
-
if (is_shared_workaround((FP)p))
|
|
66
|
-
return f();
|
|
67
|
-
|
|
68
|
-
using U = typename Cond_t<
|
|
69
|
-
sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
|
|
70
|
-
|
|
71
|
-
auto q = reinterpret_cast<U*>(p);
|
|
72
|
-
|
|
73
|
-
U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
|
|
74
|
-
U tmp1;
|
|
75
|
-
do {
|
|
76
|
-
tmp1 = tmp0;
|
|
77
|
-
|
|
78
|
-
op(reinterpret_cast<T&>(tmp1), x);
|
|
79
|
-
} while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
|
|
80
|
-
mem_order, mem_scope));
|
|
81
|
-
|
|
82
|
-
return reinterpret_cast<const T&>(tmp0);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
template<
|
|
86
|
-
int mem_order = __ATOMIC_SEQ_CST,
|
|
87
|
-
int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
|
|
88
|
-
typename T,
|
|
89
|
-
typename Cmp,
|
|
90
|
-
typename F>
|
|
91
|
-
inline
|
|
92
|
-
__attribute__((always_inline, device))
|
|
93
|
-
T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept
|
|
94
|
-
{
|
|
95
|
-
using FP = __attribute__((address_space(0))) const void*;
|
|
96
|
-
|
|
97
|
-
__device__
|
|
98
|
-
extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
|
|
99
|
-
|
|
100
|
-
if (is_shared_workaround((FP)p))
|
|
101
|
-
return f();
|
|
102
|
-
|
|
103
|
-
using U = typename Cond_t<
|
|
104
|
-
sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
|
|
105
|
-
|
|
106
|
-
auto q = reinterpret_cast<U*>(p);
|
|
107
|
-
|
|
108
|
-
U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
|
|
109
|
-
while (cmp(x, reinterpret_cast<const T&>(tmp)) &&
|
|
110
|
-
!__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
|
|
111
|
-
mem_scope));
|
|
112
|
-
|
|
113
|
-
return reinterpret_cast<const T&>(tmp);
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
__device__
|
|
117
|
-
inline
|
|
118
|
-
int atomicCAS(int* address, int compare, int val) {
|
|
119
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
120
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
121
|
-
return compare;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
__device__
|
|
125
|
-
inline
|
|
126
|
-
int atomicCAS_system(int* address, int compare, int val) {
|
|
127
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
128
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
129
|
-
return compare;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
__device__
|
|
133
|
-
inline
|
|
134
|
-
unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
|
|
135
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
136
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
137
|
-
return compare;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
__device__
|
|
141
|
-
inline
|
|
142
|
-
unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) {
|
|
143
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
144
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
145
|
-
return compare;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
__device__
|
|
149
|
-
inline
|
|
150
|
-
unsigned long atomicCAS(unsigned long* address, unsigned long compare, unsigned long val) {
|
|
151
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
152
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
153
|
-
return compare;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
__device__
|
|
157
|
-
inline
|
|
158
|
-
unsigned long atomicCAS_system(unsigned long* address, unsigned long compare, unsigned long val) {
|
|
159
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
160
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
161
|
-
return compare;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
__device__
|
|
165
|
-
inline
|
|
166
|
-
unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare,
|
|
167
|
-
unsigned long long val) {
|
|
168
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
169
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
170
|
-
return compare;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
__device__
|
|
174
|
-
inline
|
|
175
|
-
unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare,
|
|
176
|
-
unsigned long long val) {
|
|
177
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
178
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
179
|
-
return compare;
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
__device__
|
|
183
|
-
inline
|
|
184
|
-
float atomicCAS(float* address, float compare, float val) {
|
|
185
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
186
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
187
|
-
return compare;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
__device__
|
|
191
|
-
inline
|
|
192
|
-
float atomicCAS_system(float* address, float compare, float val) {
|
|
193
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
194
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
195
|
-
return compare;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
__device__
|
|
199
|
-
inline
|
|
200
|
-
double atomicCAS(double* address, double compare, double val) {
|
|
201
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
202
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
203
|
-
return compare;
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
__device__
|
|
207
|
-
inline
|
|
208
|
-
double atomicCAS_system(double* address, double compare, double val) {
|
|
209
|
-
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
|
210
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
211
|
-
return compare;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
__device__
|
|
215
|
-
inline
|
|
216
|
-
int atomicAdd(int* address, int val) {
|
|
217
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
__device__
|
|
221
|
-
inline
|
|
222
|
-
int atomicAdd_system(int* address, int val) {
|
|
223
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
__device__
|
|
227
|
-
inline
|
|
228
|
-
unsigned int atomicAdd(unsigned int* address, unsigned int val) {
|
|
229
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
__device__
|
|
233
|
-
inline
|
|
234
|
-
unsigned int atomicAdd_system(unsigned int* address, unsigned int val) {
|
|
235
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
__device__
|
|
239
|
-
inline
|
|
240
|
-
unsigned long atomicAdd(unsigned long* address, unsigned long val) {
|
|
241
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
__device__
|
|
245
|
-
inline
|
|
246
|
-
unsigned long atomicAdd_system(unsigned long* address, unsigned long val) {
|
|
247
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
__device__
|
|
251
|
-
inline
|
|
252
|
-
unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) {
|
|
253
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
__device__
|
|
257
|
-
inline
|
|
258
|
-
unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) {
|
|
259
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
__device__
|
|
263
|
-
inline
|
|
264
|
-
float atomicAdd(float* address, float val) {
|
|
265
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
266
|
-
return unsafeAtomicAdd(address, val);
|
|
267
|
-
#else
|
|
268
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
269
|
-
#endif
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
__device__
|
|
273
|
-
inline
|
|
274
|
-
float atomicAdd_system(float* address, float val) {
|
|
275
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
#if !defined(__HIPCC_RTC__)
|
|
279
|
-
DEPRECATED("use atomicAdd instead")
|
|
280
|
-
#endif // !defined(__HIPCC_RTC__)
|
|
281
|
-
__device__
|
|
282
|
-
inline
|
|
283
|
-
void atomicAddNoRet(float* address, float val)
|
|
284
|
-
{
|
|
285
|
-
__ockl_atomic_add_noret_f32(address, val);
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
__device__
|
|
289
|
-
inline
|
|
290
|
-
double atomicAdd(double* address, double val) {
|
|
291
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
292
|
-
return unsafeAtomicAdd(address, val);
|
|
293
|
-
#else
|
|
294
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
295
|
-
#endif
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
__device__
|
|
299
|
-
inline
|
|
300
|
-
double atomicAdd_system(double* address, double val) {
|
|
301
|
-
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
__device__
|
|
305
|
-
inline
|
|
306
|
-
int atomicSub(int* address, int val) {
|
|
307
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
__device__
|
|
311
|
-
inline
|
|
312
|
-
int atomicSub_system(int* address, int val) {
|
|
313
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
__device__
|
|
317
|
-
inline
|
|
318
|
-
unsigned int atomicSub(unsigned int* address, unsigned int val) {
|
|
319
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
__device__
|
|
323
|
-
inline
|
|
324
|
-
unsigned int atomicSub_system(unsigned int* address, unsigned int val) {
|
|
325
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
__device__
|
|
329
|
-
inline
|
|
330
|
-
unsigned long atomicSub(unsigned long* address, unsigned long val) {
|
|
331
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
__device__
|
|
335
|
-
inline
|
|
336
|
-
unsigned long atomicSub_system(unsigned long* address, unsigned long val) {
|
|
337
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
__device__
|
|
341
|
-
inline
|
|
342
|
-
unsigned long long atomicSub(unsigned long long* address, unsigned long long val) {
|
|
343
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
__device__
|
|
347
|
-
inline
|
|
348
|
-
unsigned long long atomicSub_system(unsigned long long* address, unsigned long long val) {
|
|
349
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
__device__
|
|
353
|
-
inline
|
|
354
|
-
float atomicSub(float* address, float val) {
|
|
355
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
356
|
-
return unsafeAtomicAdd(address, -val);
|
|
357
|
-
#else
|
|
358
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
359
|
-
#endif
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
__device__
|
|
363
|
-
inline
|
|
364
|
-
float atomicSub_system(float* address, float val) {
|
|
365
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
__device__
|
|
369
|
-
inline
|
|
370
|
-
double atomicSub(double* address, double val) {
|
|
371
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
372
|
-
return unsafeAtomicAdd(address, -val);
|
|
373
|
-
#else
|
|
374
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
375
|
-
#endif
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
__device__
|
|
379
|
-
inline
|
|
380
|
-
double atomicSub_system(double* address, double val) {
|
|
381
|
-
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
__device__
|
|
385
|
-
inline
|
|
386
|
-
int atomicExch(int* address, int val) {
|
|
387
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
__device__
|
|
391
|
-
inline
|
|
392
|
-
int atomicExch_system(int* address, int val) {
|
|
393
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
__device__
|
|
397
|
-
inline
|
|
398
|
-
unsigned int atomicExch(unsigned int* address, unsigned int val) {
|
|
399
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
__device__
|
|
403
|
-
inline
|
|
404
|
-
unsigned int atomicExch_system(unsigned int* address, unsigned int val) {
|
|
405
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
__device__
|
|
409
|
-
inline
|
|
410
|
-
unsigned long atomicExch(unsigned long* address, unsigned long val) {
|
|
411
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
__device__
|
|
415
|
-
inline
|
|
416
|
-
unsigned long atomicExch_system(unsigned long* address, unsigned long val) {
|
|
417
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
__device__
|
|
421
|
-
inline
|
|
422
|
-
unsigned long long atomicExch(unsigned long long* address, unsigned long long val) {
|
|
423
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
__device__
|
|
427
|
-
inline
|
|
428
|
-
unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) {
|
|
429
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
__device__
|
|
433
|
-
inline
|
|
434
|
-
float atomicExch(float* address, float val) {
|
|
435
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
436
|
-
}
|
|
437
|
-
|
|
438
|
-
__device__
|
|
439
|
-
inline
|
|
440
|
-
float atomicExch_system(float* address, float val) {
|
|
441
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
__device__
|
|
445
|
-
inline
|
|
446
|
-
double atomicExch(double* address, double val) {
|
|
447
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
__device__
|
|
451
|
-
inline
|
|
452
|
-
double atomicExch_system(double* address, double val) {
|
|
453
|
-
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
__device__
|
|
457
|
-
inline
|
|
458
|
-
int atomicMin(int* address, int val) {
|
|
459
|
-
#if defined(__gfx941__)
|
|
460
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
461
|
-
address, val, [](int x, int y) { return x < y; }, [=]() {
|
|
462
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
|
463
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
464
|
-
});
|
|
465
|
-
#else
|
|
466
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
467
|
-
#endif // __gfx941__
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
__device__
|
|
471
|
-
inline
|
|
472
|
-
int atomicMin_system(int* address, int val) {
|
|
473
|
-
#if defined(__gfx941__)
|
|
474
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
475
|
-
address, val, [](int x, int y) { return x < y; }, [=]() {
|
|
476
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
|
477
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
478
|
-
});
|
|
479
|
-
#else
|
|
480
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
481
|
-
#endif // __gfx941__
|
|
482
|
-
}
|
|
483
|
-
|
|
484
|
-
__device__
|
|
485
|
-
inline
|
|
486
|
-
unsigned int atomicMin(unsigned int* address, unsigned int val) {
|
|
487
|
-
#if defined(__gfx941__)
|
|
488
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
489
|
-
address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
|
|
490
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
|
491
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
492
|
-
});
|
|
493
|
-
#else
|
|
494
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
495
|
-
#endif // __gfx941__
|
|
496
|
-
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
__device__
|
|
500
|
-
inline
|
|
501
|
-
unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
|
|
502
|
-
#if defined(__gfx941__)
|
|
503
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
504
|
-
address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
|
|
505
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
|
506
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
507
|
-
});
|
|
508
|
-
#else
|
|
509
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
510
|
-
#endif // __gfx941__
|
|
511
|
-
}
|
|
512
|
-
|
|
513
|
-
__device__
|
|
514
|
-
inline
|
|
515
|
-
unsigned long long atomicMin(unsigned long* address, unsigned long val) {
|
|
516
|
-
#if defined(__gfx941__)
|
|
517
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
518
|
-
address,
|
|
519
|
-
val,
|
|
520
|
-
[](unsigned long x, unsigned long y) { return x < y; },
|
|
521
|
-
[=]() {
|
|
522
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
|
523
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
524
|
-
});
|
|
525
|
-
#else
|
|
526
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
527
|
-
#endif // __gfx941__
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
__device__
|
|
531
|
-
inline
|
|
532
|
-
unsigned long atomicMin_system(unsigned long* address, unsigned long val) {
|
|
533
|
-
#if defined(__gfx941__)
|
|
534
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
535
|
-
address,
|
|
536
|
-
val,
|
|
537
|
-
[](unsigned long x, unsigned long y) { return x < y; },
|
|
538
|
-
[=]() {
|
|
539
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
|
540
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
541
|
-
});
|
|
542
|
-
#else
|
|
543
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
544
|
-
#endif // __gfx941__
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
__device__
|
|
548
|
-
inline
|
|
549
|
-
unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
|
|
550
|
-
#if defined(__gfx941__)
|
|
551
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
552
|
-
address,
|
|
553
|
-
val,
|
|
554
|
-
[](unsigned long long x, unsigned long long y) { return x < y; },
|
|
555
|
-
[=]() {
|
|
556
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
|
557
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
558
|
-
});
|
|
559
|
-
#else
|
|
560
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
561
|
-
#endif // __gfx941__
|
|
562
|
-
}
|
|
563
|
-
|
|
564
|
-
__device__
|
|
565
|
-
inline
|
|
566
|
-
unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
|
|
567
|
-
#if defined(__gfx941__)
|
|
568
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
569
|
-
address,
|
|
570
|
-
val,
|
|
571
|
-
[](unsigned long long x, unsigned long long y) { return x < y; },
|
|
572
|
-
[=]() {
|
|
573
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
|
574
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
575
|
-
});
|
|
576
|
-
#else
|
|
577
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
578
|
-
#endif // __gfx941__
|
|
579
|
-
}
|
|
580
|
-
|
|
581
|
-
__device__
|
|
582
|
-
inline
|
|
583
|
-
long long atomicMin(long long* address, long long val) {
|
|
584
|
-
#if defined(__gfx941__)
|
|
585
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
586
|
-
address, val, [](long long x, long long y) { return x < y; },
|
|
587
|
-
[=]() {
|
|
588
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
589
|
-
});
|
|
590
|
-
#else
|
|
591
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
592
|
-
#endif // __gfx941__
|
|
593
|
-
}
|
|
594
|
-
|
|
595
|
-
__device__
|
|
596
|
-
inline
|
|
597
|
-
long long atomicMin_system(long long* address, long long val) {
|
|
598
|
-
#if defined(__gfx941__)
|
|
599
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
600
|
-
address, val, [](long long x, long long y) { return x < y; },
|
|
601
|
-
[=]() {
|
|
602
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
603
|
-
});
|
|
604
|
-
#else
|
|
605
|
-
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
606
|
-
#endif // __gfx941__
|
|
607
|
-
}
|
|
608
|
-
|
|
609
|
-
__device__
|
|
610
|
-
inline
|
|
611
|
-
float atomicMin(float* addr, float val) {
|
|
612
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
613
|
-
return unsafeAtomicMin(addr, val);
|
|
614
|
-
#else
|
|
615
|
-
typedef union u_hold {
|
|
616
|
-
float a;
|
|
617
|
-
unsigned int b;
|
|
618
|
-
} u_hold_t;
|
|
619
|
-
u_hold_t u{val};
|
|
620
|
-
bool neg_zero = 0x80000000U == u.b;
|
|
621
|
-
#if __has_builtin(__hip_atomic_load) && \
|
|
622
|
-
__has_builtin(__hip_atomic_compare_exchange_strong)
|
|
623
|
-
float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
624
|
-
bool done = false;
|
|
625
|
-
while (!done && (value > val || (neg_zero && value == 0.0f))) {
|
|
626
|
-
done = __hip_atomic_compare_exchange_strong(addr, &value, val,
|
|
627
|
-
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
628
|
-
}
|
|
629
|
-
return value;
|
|
630
|
-
#else
|
|
631
|
-
unsigned int *uaddr = (unsigned int *)addr;
|
|
632
|
-
unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
|
|
633
|
-
bool done = false;
|
|
634
|
-
while (!done && (__uint_as_float(value) > val || (neg_zero && __uint_as_float(value) == 0.0f))) {
|
|
635
|
-
done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
|
|
636
|
-
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
|
637
|
-
}
|
|
638
|
-
return __uint_as_float(value);
|
|
639
|
-
#endif
|
|
640
|
-
#endif
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
__device__
|
|
644
|
-
inline
|
|
645
|
-
float atomicMin_system(float* address, float val) {
|
|
646
|
-
unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
|
|
647
|
-
#if __has_builtin(__hip_atomic_load)
|
|
648
|
-
unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
|
|
649
|
-
#else
|
|
650
|
-
unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
|
|
651
|
-
#endif
|
|
652
|
-
float value = __uint_as_float(tmp);
|
|
653
|
-
|
|
654
|
-
while (val < value) {
|
|
655
|
-
value = atomicCAS_system(address, value, val);
|
|
656
|
-
}
|
|
657
|
-
|
|
658
|
-
return value;
|
|
659
|
-
}
|
|
660
|
-
|
|
661
|
-
__device__
|
|
662
|
-
inline
|
|
663
|
-
double atomicMin(double* addr, double val) {
|
|
664
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
665
|
-
return unsafeAtomicMin(addr, val);
|
|
666
|
-
#else
|
|
667
|
-
typedef union u_hold {
|
|
668
|
-
double a;
|
|
669
|
-
unsigned long long b;
|
|
670
|
-
} u_hold_t;
|
|
671
|
-
u_hold_t u{val};
|
|
672
|
-
bool neg_zero = 0x8000000000000000ULL == u.b;
|
|
673
|
-
#if __has_builtin(__hip_atomic_load) && \
|
|
674
|
-
__has_builtin(__hip_atomic_compare_exchange_strong)
|
|
675
|
-
double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
676
|
-
bool done = false;
|
|
677
|
-
while (!done && (value > val || (neg_zero && value == 0.0))) {
|
|
678
|
-
done = __hip_atomic_compare_exchange_strong(addr, &value, val,
|
|
679
|
-
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
680
|
-
}
|
|
681
|
-
return value;
|
|
682
|
-
#else
|
|
683
|
-
unsigned long long *uaddr = (unsigned long long *)addr;
|
|
684
|
-
unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
|
|
685
|
-
bool done = false;
|
|
686
|
-
while (!done &&
|
|
687
|
-
(__longlong_as_double(value) > val || (neg_zero && __longlong_as_double(value) == 0.0))) {
|
|
688
|
-
done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
|
|
689
|
-
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
|
690
|
-
}
|
|
691
|
-
return __longlong_as_double(value);
|
|
692
|
-
#endif
|
|
693
|
-
#endif
|
|
694
|
-
}
|
|
695
|
-
|
|
696
|
-
__device__
|
|
697
|
-
inline
|
|
698
|
-
double atomicMin_system(double* address, double val) {
|
|
699
|
-
unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
|
|
700
|
-
#if __has_builtin(__hip_atomic_load)
|
|
701
|
-
unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
|
|
702
|
-
#else
|
|
703
|
-
unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
|
|
704
|
-
#endif
|
|
705
|
-
double value = __longlong_as_double(tmp);
|
|
706
|
-
|
|
707
|
-
while (val < value) {
|
|
708
|
-
value = atomicCAS_system(address, value, val);
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
return value;
|
|
712
|
-
}
|
|
713
|
-
|
|
714
|
-
__device__
|
|
715
|
-
inline
|
|
716
|
-
int atomicMax(int* address, int val) {
|
|
717
|
-
#if defined(__gfx941__)
|
|
718
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
719
|
-
address, val, [](int x, int y) { return y < x; }, [=]() {
|
|
720
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
|
721
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
722
|
-
});
|
|
723
|
-
#else
|
|
724
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
725
|
-
#endif // __gfx941__
|
|
726
|
-
}
|
|
727
|
-
|
|
728
|
-
__device__
|
|
729
|
-
inline
|
|
730
|
-
int atomicMax_system(int* address, int val) {
|
|
731
|
-
#if defined(__gfx941__)
|
|
732
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
733
|
-
address, val, [](int x, int y) { return y < x; }, [=]() {
|
|
734
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
|
735
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
736
|
-
});
|
|
737
|
-
#else
|
|
738
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
739
|
-
#endif // __gfx941__
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
__device__
|
|
743
|
-
inline
|
|
744
|
-
unsigned int atomicMax(unsigned int* address, unsigned int val) {
|
|
745
|
-
#if defined(__gfx941__)
|
|
746
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
747
|
-
address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
|
|
748
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
|
749
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
750
|
-
});
|
|
751
|
-
#else
|
|
752
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
753
|
-
#endif // __gfx941__
|
|
754
|
-
}
|
|
755
|
-
|
|
756
|
-
__device__
|
|
757
|
-
inline
|
|
758
|
-
unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
|
|
759
|
-
#if defined(__gfx941__)
|
|
760
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
761
|
-
address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
|
|
762
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
|
763
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
764
|
-
});
|
|
765
|
-
#else
|
|
766
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
767
|
-
#endif // __gfx941__
|
|
768
|
-
}
|
|
769
|
-
|
|
770
|
-
__device__
|
|
771
|
-
inline
|
|
772
|
-
unsigned long atomicMax(unsigned long* address, unsigned long val) {
|
|
773
|
-
#if defined(__gfx941__)
|
|
774
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
775
|
-
address,
|
|
776
|
-
val,
|
|
777
|
-
[](unsigned long x, unsigned long y) { return y < x; },
|
|
778
|
-
[=]() {
|
|
779
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
|
780
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
781
|
-
});
|
|
782
|
-
#else
|
|
783
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
784
|
-
#endif // __gfx941__
|
|
785
|
-
}
|
|
786
|
-
|
|
787
|
-
__device__
|
|
788
|
-
inline
|
|
789
|
-
unsigned long atomicMax_system(unsigned long* address, unsigned long val) {
|
|
790
|
-
#if defined(__gfx941__)
|
|
791
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
792
|
-
address,
|
|
793
|
-
val,
|
|
794
|
-
[](unsigned long x, unsigned long y) { return y < x; },
|
|
795
|
-
[=]() {
|
|
796
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
|
797
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
798
|
-
});
|
|
799
|
-
#else
|
|
800
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
801
|
-
#endif // __gfx941__
|
|
802
|
-
}
|
|
803
|
-
|
|
804
|
-
__device__
|
|
805
|
-
inline
|
|
806
|
-
unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
|
|
807
|
-
#if defined(__gfx941__)
|
|
808
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
809
|
-
address,
|
|
810
|
-
val,
|
|
811
|
-
[](unsigned long long x, unsigned long long y) { return y < x; },
|
|
812
|
-
[=]() {
|
|
813
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
|
814
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
815
|
-
});
|
|
816
|
-
#else
|
|
817
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
818
|
-
#endif // __gfx941__
|
|
819
|
-
}
|
|
820
|
-
|
|
821
|
-
__device__
|
|
822
|
-
inline
|
|
823
|
-
unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
|
|
824
|
-
#if defined(__gfx941__)
|
|
825
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
826
|
-
address,
|
|
827
|
-
val,
|
|
828
|
-
[](unsigned long long x, unsigned long long y) { return y < x; },
|
|
829
|
-
[=]() {
|
|
830
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
|
831
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
832
|
-
});
|
|
833
|
-
#else
|
|
834
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
835
|
-
#endif // __gfx941__
|
|
836
|
-
}
|
|
837
|
-
|
|
838
|
-
__device__
|
|
839
|
-
inline
|
|
840
|
-
long long atomicMax(long long* address, long long val) {
|
|
841
|
-
#if defined(__gfx941__)
|
|
842
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
843
|
-
address, val, [](long long x, long long y) { return y < x; },
|
|
844
|
-
[=]() {
|
|
845
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
846
|
-
});
|
|
847
|
-
#else
|
|
848
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
849
|
-
#endif // __gfx941__
|
|
850
|
-
}
|
|
851
|
-
|
|
852
|
-
__device__
|
|
853
|
-
inline
|
|
854
|
-
long long atomicMax_system(long long* address, long long val) {
|
|
855
|
-
#if defined(__gfx941__)
|
|
856
|
-
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
857
|
-
address, val, [](long long x, long long y) { return y < x; },
|
|
858
|
-
[=]() {
|
|
859
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
860
|
-
});
|
|
861
|
-
#else
|
|
862
|
-
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
863
|
-
#endif // __gfx941__
|
|
864
|
-
}
|
|
865
|
-
|
|
866
|
-
__device__
|
|
867
|
-
inline
|
|
868
|
-
float atomicMax(float* addr, float val) {
|
|
869
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
870
|
-
return unsafeAtomicMax(addr, val);
|
|
871
|
-
#else
|
|
872
|
-
typedef union u_hold {
|
|
873
|
-
float a;
|
|
874
|
-
unsigned int b;
|
|
875
|
-
} u_hold_t;
|
|
876
|
-
u_hold_t u{val};
|
|
877
|
-
bool neg_zero = 0x80000000U == u.b;
|
|
878
|
-
#if __has_builtin(__hip_atomic_load) && \
|
|
879
|
-
__has_builtin(__hip_atomic_compare_exchange_strong)
|
|
880
|
-
float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
881
|
-
bool done = false;
|
|
882
|
-
while (!done && (value < val || (neg_zero && value == 0.0f))) {
|
|
883
|
-
done = __hip_atomic_compare_exchange_strong(addr, &value, val,
|
|
884
|
-
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
885
|
-
}
|
|
886
|
-
return value;
|
|
887
|
-
#else
|
|
888
|
-
unsigned int *uaddr = (unsigned int *)addr;
|
|
889
|
-
unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
|
|
890
|
-
bool done = false;
|
|
891
|
-
while (!done && (__uint_as_float(value) < val || (neg_zero && __uint_as_float(value) == 0.0f))) {
|
|
892
|
-
done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
|
|
893
|
-
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
|
894
|
-
}
|
|
895
|
-
return __uint_as_float(value);
|
|
896
|
-
#endif
|
|
897
|
-
#endif
|
|
898
|
-
}
|
|
899
|
-
|
|
900
|
-
__device__
|
|
901
|
-
inline
|
|
902
|
-
float atomicMax_system(float* address, float val) {
|
|
903
|
-
unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
|
|
904
|
-
#if __has_builtin(__hip_atomic_load)
|
|
905
|
-
unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
|
|
906
|
-
#else
|
|
907
|
-
unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
|
|
908
|
-
#endif
|
|
909
|
-
float value = __uint_as_float(tmp);
|
|
910
|
-
|
|
911
|
-
while (value < val) {
|
|
912
|
-
value = atomicCAS_system(address, value, val);
|
|
913
|
-
}
|
|
914
|
-
|
|
915
|
-
return value;
|
|
916
|
-
}
|
|
917
|
-
|
|
918
|
-
__device__
|
|
919
|
-
inline
|
|
920
|
-
double atomicMax(double* addr, double val) {
|
|
921
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
922
|
-
return unsafeAtomicMax(addr, val);
|
|
923
|
-
#else
|
|
924
|
-
typedef union u_hold {
|
|
925
|
-
double a;
|
|
926
|
-
unsigned long long b;
|
|
927
|
-
} u_hold_t;
|
|
928
|
-
u_hold_t u{val};
|
|
929
|
-
bool neg_zero = 0x8000000000000000ULL == u.b;
|
|
930
|
-
#if __has_builtin(__hip_atomic_load) && \
|
|
931
|
-
__has_builtin(__hip_atomic_compare_exchange_strong)
|
|
932
|
-
double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
933
|
-
bool done = false;
|
|
934
|
-
while (!done && (value < val || (neg_zero && value == 0.0))) {
|
|
935
|
-
done = __hip_atomic_compare_exchange_strong(addr, &value, val,
|
|
936
|
-
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
937
|
-
}
|
|
938
|
-
return value;
|
|
939
|
-
#else
|
|
940
|
-
unsigned long long *uaddr = (unsigned long long *)addr;
|
|
941
|
-
unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
|
|
942
|
-
bool done = false;
|
|
943
|
-
while (!done &&
|
|
944
|
-
(__longlong_as_double(value) < val || (neg_zero && __longlong_as_double(value) == 0.0))) {
|
|
945
|
-
done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
|
|
946
|
-
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
|
947
|
-
}
|
|
948
|
-
return __longlong_as_double(value);
|
|
949
|
-
#endif
|
|
950
|
-
#endif
|
|
951
|
-
}
|
|
952
|
-
|
|
953
|
-
__device__
|
|
954
|
-
inline
|
|
955
|
-
double atomicMax_system(double* address, double val) {
|
|
956
|
-
unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
|
|
957
|
-
#if __has_builtin(__hip_atomic_load)
|
|
958
|
-
unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
|
|
959
|
-
#else
|
|
960
|
-
unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
|
|
961
|
-
#endif
|
|
962
|
-
double value = __longlong_as_double(tmp);
|
|
963
|
-
|
|
964
|
-
while (value < val) {
|
|
965
|
-
value = atomicCAS_system(address, value, val);
|
|
966
|
-
}
|
|
967
|
-
|
|
968
|
-
return value;
|
|
969
|
-
}
|
|
970
|
-
|
|
971
|
-
__device__
|
|
972
|
-
inline
|
|
973
|
-
unsigned int atomicInc(unsigned int* address, unsigned int val)
|
|
974
|
-
{
|
|
975
|
-
#if defined(__gfx941__)
|
|
976
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
977
|
-
address,
|
|
978
|
-
val,
|
|
979
|
-
[](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
|
|
980
|
-
[=]() {
|
|
981
|
-
return
|
|
982
|
-
__builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
|
|
983
|
-
});
|
|
984
|
-
#else
|
|
985
|
-
return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
|
|
986
|
-
#endif // __gfx941__
|
|
987
|
-
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
__device__
|
|
991
|
-
inline
|
|
992
|
-
unsigned int atomicDec(unsigned int* address, unsigned int val)
|
|
993
|
-
{
|
|
994
|
-
#if defined(__gfx941__)
|
|
995
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
996
|
-
address,
|
|
997
|
-
val,
|
|
998
|
-
[](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
|
|
999
|
-
[=]() {
|
|
1000
|
-
return
|
|
1001
|
-
__builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
|
|
1002
|
-
});
|
|
1003
|
-
#else
|
|
1004
|
-
return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
|
|
1005
|
-
#endif // __gfx941__
|
|
1006
|
-
|
|
1007
|
-
}
|
|
1008
|
-
|
|
1009
|
-
__device__
|
|
1010
|
-
inline
|
|
1011
|
-
int atomicAnd(int* address, int val) {
|
|
1012
|
-
#if defined(__gfx941__)
|
|
1013
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1014
|
-
address, val, [](int& x, int y) { x &= y; }, [=]() {
|
|
1015
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
|
1016
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1017
|
-
});
|
|
1018
|
-
#else
|
|
1019
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1020
|
-
#endif // __gfx941__
|
|
1021
|
-
}
|
|
1022
|
-
|
|
1023
|
-
__device__
|
|
1024
|
-
inline
|
|
1025
|
-
int atomicAnd_system(int* address, int val) {
|
|
1026
|
-
#if defined(__gfx941__)
|
|
1027
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1028
|
-
address, val, [](int& x, int y) { x &= y; }, [=]() {
|
|
1029
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
|
1030
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1031
|
-
});
|
|
1032
|
-
#else
|
|
1033
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1034
|
-
#endif // __gfx941__
|
|
1035
|
-
}
|
|
1036
|
-
|
|
1037
|
-
__device__
|
|
1038
|
-
inline
|
|
1039
|
-
unsigned int atomicAnd(unsigned int* address, unsigned int val) {
|
|
1040
|
-
#if defined(__gfx941__)
|
|
1041
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1042
|
-
address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
|
|
1043
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
|
1044
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1045
|
-
});
|
|
1046
|
-
#else
|
|
1047
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1048
|
-
#endif // __gfx941__
|
|
1049
|
-
}
|
|
1050
|
-
|
|
1051
|
-
__device__
|
|
1052
|
-
inline
|
|
1053
|
-
unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
|
|
1054
|
-
#if defined(__gfx941__)
|
|
1055
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1056
|
-
address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
|
|
1057
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
|
1058
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1059
|
-
});
|
|
1060
|
-
#else
|
|
1061
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1062
|
-
#endif // __gfx941__
|
|
1063
|
-
}
|
|
1064
|
-
|
|
1065
|
-
__device__
|
|
1066
|
-
inline
|
|
1067
|
-
unsigned long atomicAnd(unsigned long* address, unsigned long val) {
|
|
1068
|
-
#if defined(__gfx941__)
|
|
1069
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1070
|
-
address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
|
|
1071
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
|
1072
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1073
|
-
});
|
|
1074
|
-
#else
|
|
1075
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1076
|
-
#endif // __gfx941__
|
|
1077
|
-
}
|
|
1078
|
-
|
|
1079
|
-
__device__
|
|
1080
|
-
inline
|
|
1081
|
-
unsigned long atomicAnd_system(unsigned long* address, unsigned long val) {
|
|
1082
|
-
#if defined(__gfx941__)
|
|
1083
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1084
|
-
address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
|
|
1085
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
|
1086
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1087
|
-
});
|
|
1088
|
-
#else
|
|
1089
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1090
|
-
#endif // __gfx941__
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
__device__
|
|
1094
|
-
inline
|
|
1095
|
-
unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
|
|
1096
|
-
#if defined(__gfx941__)
|
|
1097
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1098
|
-
address,
|
|
1099
|
-
val,
|
|
1100
|
-
[](unsigned long long& x, unsigned long long y) { x &= y; },
|
|
1101
|
-
[=]() {
|
|
1102
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
|
1103
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1104
|
-
});
|
|
1105
|
-
#else
|
|
1106
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1107
|
-
#endif // __gfx941__
|
|
1108
|
-
}
|
|
1109
|
-
|
|
1110
|
-
__device__
|
|
1111
|
-
inline
|
|
1112
|
-
unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
|
|
1113
|
-
#if defined(__gfx941__)
|
|
1114
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1115
|
-
address,
|
|
1116
|
-
val,
|
|
1117
|
-
[](unsigned long long& x, unsigned long long y) { x &= y; },
|
|
1118
|
-
[=]() {
|
|
1119
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
|
1120
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1121
|
-
});
|
|
1122
|
-
#else
|
|
1123
|
-
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1124
|
-
#endif // __gfx941__
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
__device__
|
|
1128
|
-
inline
|
|
1129
|
-
int atomicOr(int* address, int val) {
|
|
1130
|
-
#if defined(__gfx941__)
|
|
1131
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1132
|
-
address, val, [](int& x, int y) { x |= y; }, [=]() {
|
|
1133
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
|
1134
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1135
|
-
});
|
|
1136
|
-
#else
|
|
1137
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1138
|
-
#endif // __gfx941__
|
|
1139
|
-
}
|
|
1140
|
-
|
|
1141
|
-
__device__
|
|
1142
|
-
inline
|
|
1143
|
-
int atomicOr_system(int* address, int val) {
|
|
1144
|
-
#if defined(__gfx941__)
|
|
1145
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1146
|
-
address, val, [](int& x, int y) { x |= y; }, [=]() {
|
|
1147
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
|
1148
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1149
|
-
});
|
|
1150
|
-
#else
|
|
1151
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1152
|
-
#endif // __gfx941__
|
|
1153
|
-
}
|
|
1154
|
-
|
|
1155
|
-
__device__
|
|
1156
|
-
inline
|
|
1157
|
-
unsigned int atomicOr(unsigned int* address, unsigned int val) {
|
|
1158
|
-
#if defined(__gfx941__)
|
|
1159
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1160
|
-
address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
|
|
1161
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
|
1162
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1163
|
-
});
|
|
1164
|
-
#else
|
|
1165
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1166
|
-
#endif // __gfx941__
|
|
1167
|
-
}
|
|
1168
|
-
|
|
1169
|
-
__device__
|
|
1170
|
-
inline
|
|
1171
|
-
unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
|
|
1172
|
-
#if defined(__gfx941__)
|
|
1173
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1174
|
-
address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
|
|
1175
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
|
1176
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1177
|
-
});
|
|
1178
|
-
#else
|
|
1179
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1180
|
-
#endif // __gfx941__
|
|
1181
|
-
}
|
|
1182
|
-
|
|
1183
|
-
__device__
|
|
1184
|
-
inline
|
|
1185
|
-
unsigned long atomicOr(unsigned long* address, unsigned long val) {
|
|
1186
|
-
#if defined(__gfx941__)
|
|
1187
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1188
|
-
address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
|
|
1189
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
|
1190
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1191
|
-
});
|
|
1192
|
-
#else
|
|
1193
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1194
|
-
#endif // __gfx941__
|
|
1195
|
-
}
|
|
1196
|
-
|
|
1197
|
-
__device__
|
|
1198
|
-
inline
|
|
1199
|
-
unsigned long atomicOr_system(unsigned long* address, unsigned long val) {
|
|
1200
|
-
#if defined(__gfx941__)
|
|
1201
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1202
|
-
address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
|
|
1203
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
|
1204
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1205
|
-
});
|
|
1206
|
-
#else
|
|
1207
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1208
|
-
#endif // __gfx941__
|
|
1209
|
-
}
|
|
1210
|
-
|
|
1211
|
-
__device__
|
|
1212
|
-
inline
|
|
1213
|
-
unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
|
|
1214
|
-
#if defined(__gfx941__)
|
|
1215
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1216
|
-
address,
|
|
1217
|
-
val,
|
|
1218
|
-
[](unsigned long long& x, unsigned long long y) { x |= y; },
|
|
1219
|
-
[=]() {
|
|
1220
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
|
1221
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1222
|
-
});
|
|
1223
|
-
#else
|
|
1224
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1225
|
-
#endif // __gfx941__
|
|
1226
|
-
}
|
|
1227
|
-
|
|
1228
|
-
__device__
|
|
1229
|
-
inline
|
|
1230
|
-
unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
|
|
1231
|
-
#if defined(__gfx941__)
|
|
1232
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1233
|
-
address,
|
|
1234
|
-
val,
|
|
1235
|
-
[](unsigned long long& x, unsigned long long y) { x |= y; },
|
|
1236
|
-
[=]() {
|
|
1237
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
|
1238
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1239
|
-
});
|
|
1240
|
-
#else
|
|
1241
|
-
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1242
|
-
#endif // __gfx941__
|
|
1243
|
-
}
|
|
1244
|
-
|
|
1245
|
-
__device__
|
|
1246
|
-
inline
|
|
1247
|
-
int atomicXor(int* address, int val) {
|
|
1248
|
-
#if defined(__gfx941__)
|
|
1249
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1250
|
-
address, val, [](int& x, int y) { x ^= y; }, [=]() {
|
|
1251
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
|
1252
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1253
|
-
});
|
|
1254
|
-
#else
|
|
1255
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1256
|
-
#endif // __gfx941__
|
|
1257
|
-
}
|
|
1258
|
-
|
|
1259
|
-
__device__
|
|
1260
|
-
inline
|
|
1261
|
-
int atomicXor_system(int* address, int val) {
|
|
1262
|
-
#if defined(__gfx941__)
|
|
1263
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1264
|
-
address, val, [](int& x, int y) { x ^= y; }, [=]() {
|
|
1265
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
|
1266
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1267
|
-
});
|
|
1268
|
-
#else
|
|
1269
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1270
|
-
#endif // __gfx941__
|
|
1271
|
-
}
|
|
1272
|
-
|
|
1273
|
-
__device__
|
|
1274
|
-
inline
|
|
1275
|
-
unsigned int atomicXor(unsigned int* address, unsigned int val) {
|
|
1276
|
-
#if defined(__gfx941__)
|
|
1277
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1278
|
-
address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
|
|
1279
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
|
1280
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1281
|
-
});
|
|
1282
|
-
#else
|
|
1283
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1284
|
-
#endif // __gfx941__
|
|
1285
|
-
}
|
|
1286
|
-
|
|
1287
|
-
__device__
|
|
1288
|
-
inline
|
|
1289
|
-
unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
|
|
1290
|
-
#if defined(__gfx941__)
|
|
1291
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1292
|
-
address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
|
|
1293
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
|
1294
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1295
|
-
});
|
|
1296
|
-
#else
|
|
1297
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1298
|
-
#endif // __gfx941__
|
|
1299
|
-
}
|
|
1300
|
-
|
|
1301
|
-
__device__
|
|
1302
|
-
inline
|
|
1303
|
-
unsigned long atomicXor(unsigned long* address, unsigned long val) {
|
|
1304
|
-
#if defined(__gfx941__)
|
|
1305
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1306
|
-
address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
|
|
1307
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
|
1308
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1309
|
-
});
|
|
1310
|
-
#else
|
|
1311
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1312
|
-
#endif // __gfx941__
|
|
1313
|
-
}
|
|
1314
|
-
|
|
1315
|
-
__device__
|
|
1316
|
-
inline
|
|
1317
|
-
unsigned long atomicXor_system(unsigned long* address, unsigned long val) {
|
|
1318
|
-
#if defined(__gfx941__)
|
|
1319
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
|
1320
|
-
address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
|
|
1321
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
|
1322
|
-
__HIP_MEMORY_SCOPE_SYSTEM);
|
|
1323
|
-
});
|
|
1324
|
-
#else
|
|
1325
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1326
|
-
#endif // __gfx941__
|
|
1327
|
-
}
|
|
1328
|
-
|
|
1329
|
-
__device__
|
|
1330
|
-
inline
|
|
1331
|
-
unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
|
|
1332
|
-
#if defined(__gfx941__)
|
|
1333
|
-
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
|
1334
|
-
address,
|
|
1335
|
-
val,
|
|
1336
|
-
[](unsigned long long& x, unsigned long long y) { x ^= y; },
|
|
1337
|
-
[=]() {
|
|
1338
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
|
1339
|
-
__HIP_MEMORY_SCOPE_AGENT);
|
|
1340
|
-
});
|
|
1341
|
-
#else
|
|
1342
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
|
1343
|
-
#endif // __gfx941__
|
|
1344
|
-
}
|
|
1345
|
-
|
|
1346
|
-
__device__
|
|
1347
|
-
inline
|
|
1348
|
-
unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) {
|
|
1349
|
-
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
|
1350
|
-
}
|
|
1351
|
-
|
|
1352
|
-
#else // __hip_atomic_compare_exchange_strong
|
|
1353
|
-
|
|
1354
|
-
__device__
|
|
1355
|
-
inline
|
|
1356
|
-
int atomicCAS(int* address, int compare, int val)
|
|
1357
|
-
{
|
|
1358
|
-
__atomic_compare_exchange_n(
|
|
1359
|
-
address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
|
1360
|
-
|
|
1361
|
-
return compare;
|
|
1362
|
-
}
|
|
1363
|
-
__device__
|
|
1364
|
-
inline
|
|
1365
|
-
unsigned int atomicCAS(
|
|
1366
|
-
unsigned int* address, unsigned int compare, unsigned int val)
|
|
1367
|
-
{
|
|
1368
|
-
__atomic_compare_exchange_n(
|
|
1369
|
-
address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
|
1370
|
-
|
|
1371
|
-
return compare;
|
|
1372
|
-
}
|
|
1373
|
-
__device__
|
|
1374
|
-
inline
|
|
1375
|
-
unsigned long long atomicCAS(
|
|
1376
|
-
unsigned long long* address,
|
|
1377
|
-
unsigned long long compare,
|
|
1378
|
-
unsigned long long val)
|
|
1379
|
-
{
|
|
1380
|
-
__atomic_compare_exchange_n(
|
|
1381
|
-
address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
|
1382
|
-
|
|
1383
|
-
return compare;
|
|
1384
|
-
}
|
|
1385
|
-
|
|
1386
|
-
__device__
|
|
1387
|
-
inline
|
|
1388
|
-
int atomicAdd(int* address, int val)
|
|
1389
|
-
{
|
|
1390
|
-
return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
|
|
1391
|
-
}
|
|
1392
|
-
__device__
|
|
1393
|
-
inline
|
|
1394
|
-
unsigned int atomicAdd(unsigned int* address, unsigned int val)
|
|
1395
|
-
{
|
|
1396
|
-
return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
|
|
1397
|
-
}
|
|
1398
|
-
__device__
|
|
1399
|
-
inline
|
|
1400
|
-
unsigned long long atomicAdd(
|
|
1401
|
-
unsigned long long* address, unsigned long long val)
|
|
1402
|
-
{
|
|
1403
|
-
return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
|
|
1404
|
-
}
|
|
1405
|
-
__device__
|
|
1406
|
-
inline
|
|
1407
|
-
float atomicAdd(float* address, float val)
|
|
1408
|
-
{
|
|
1409
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
1410
|
-
return unsafeAtomicAdd(address, val);
|
|
1411
|
-
#else
|
|
1412
|
-
return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
|
|
1413
|
-
#endif
|
|
1414
|
-
}
|
|
1415
|
-
|
|
1416
|
-
#if !defined(__HIPCC_RTC__)
|
|
1417
|
-
DEPRECATED("use atomicAdd instead")
|
|
1418
|
-
#endif // !defined(__HIPCC_RTC__)
|
|
1419
|
-
__device__
|
|
1420
|
-
inline
|
|
1421
|
-
void atomicAddNoRet(float* address, float val)
|
|
1422
|
-
{
|
|
1423
|
-
__ockl_atomic_add_noret_f32(address, val);
|
|
1424
|
-
}
|
|
1425
|
-
|
|
1426
|
-
__device__
|
|
1427
|
-
inline
|
|
1428
|
-
double atomicAdd(double* address, double val)
|
|
1429
|
-
{
|
|
1430
|
-
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
|
|
1431
|
-
return unsafeAtomicAdd(address, val);
|
|
1432
|
-
#else
|
|
1433
|
-
return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
|
|
1434
|
-
#endif
|
|
1435
|
-
}
|
|
1436
|
-
|
|
1437
|
-
__device__
|
|
1438
|
-
inline
|
|
1439
|
-
int atomicSub(int* address, int val)
|
|
1440
|
-
{
|
|
1441
|
-
return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
|
|
1442
|
-
}
|
|
1443
|
-
__device__
|
|
1444
|
-
inline
|
|
1445
|
-
unsigned int atomicSub(unsigned int* address, unsigned int val)
|
|
1446
|
-
{
|
|
1447
|
-
return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
|
|
1448
|
-
}
|
|
1449
|
-
|
|
1450
|
-
__device__
|
|
1451
|
-
inline
|
|
1452
|
-
int atomicExch(int* address, int val)
|
|
1453
|
-
{
|
|
1454
|
-
return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
|
|
1455
|
-
}
|
|
1456
|
-
__device__
|
|
1457
|
-
inline
|
|
1458
|
-
unsigned int atomicExch(unsigned int* address, unsigned int val)
|
|
1459
|
-
{
|
|
1460
|
-
return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
|
|
1461
|
-
}
|
|
1462
|
-
__device__
|
|
1463
|
-
inline
|
|
1464
|
-
unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
|
|
1465
|
-
{
|
|
1466
|
-
return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
|
|
1467
|
-
}
|
|
1468
|
-
__device__
|
|
1469
|
-
inline
|
|
1470
|
-
float atomicExch(float* address, float val)
|
|
1471
|
-
{
|
|
1472
|
-
return __uint_as_float(__atomic_exchange_n(
|
|
1473
|
-
reinterpret_cast<unsigned int*>(address),
|
|
1474
|
-
__float_as_uint(val),
|
|
1475
|
-
__ATOMIC_RELAXED));
|
|
1476
|
-
}
|
|
1477
|
-
|
|
1478
|
-
__device__
|
|
1479
|
-
inline
|
|
1480
|
-
int atomicMin(int* address, int val)
|
|
1481
|
-
{
|
|
1482
|
-
return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
|
|
1483
|
-
}
|
|
1484
|
-
__device__
|
|
1485
|
-
inline
|
|
1486
|
-
unsigned int atomicMin(unsigned int* address, unsigned int val)
|
|
1487
|
-
{
|
|
1488
|
-
return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
|
|
1489
|
-
}
|
|
1490
|
-
__device__
|
|
1491
|
-
inline
|
|
1492
|
-
unsigned long long atomicMin(
|
|
1493
|
-
unsigned long long* address, unsigned long long val)
|
|
1494
|
-
{
|
|
1495
|
-
unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
|
|
1496
|
-
while (val < tmp) {
|
|
1497
|
-
const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
|
|
1498
|
-
|
|
1499
|
-
if (tmp1 != tmp) { tmp = tmp1; continue; }
|
|
1500
|
-
|
|
1501
|
-
tmp = atomicCAS(address, tmp, val);
|
|
1502
|
-
}
|
|
1503
|
-
|
|
1504
|
-
return tmp;
|
|
1505
|
-
}
|
|
1506
|
-
__device__ inline long long atomicMin(long long* address, long long val) {
|
|
1507
|
-
long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
|
|
1508
|
-
while (val < tmp) {
|
|
1509
|
-
const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
|
|
1510
|
-
|
|
1511
|
-
if (tmp1 != tmp) {
|
|
1512
|
-
tmp = tmp1;
|
|
1513
|
-
continue;
|
|
1514
|
-
}
|
|
1515
|
-
|
|
1516
|
-
tmp = atomicCAS(address, tmp, val);
|
|
1517
|
-
}
|
|
1518
|
-
return tmp;
|
|
1519
|
-
}
|
|
1520
|
-
|
|
1521
|
-
__device__
|
|
1522
|
-
inline
|
|
1523
|
-
int atomicMax(int* address, int val)
|
|
1524
|
-
{
|
|
1525
|
-
return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
|
|
1526
|
-
}
|
|
1527
|
-
__device__
|
|
1528
|
-
inline
|
|
1529
|
-
unsigned int atomicMax(unsigned int* address, unsigned int val)
|
|
1530
|
-
{
|
|
1531
|
-
return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
|
|
1532
|
-
}
|
|
1533
|
-
__device__
|
|
1534
|
-
inline
|
|
1535
|
-
unsigned long long atomicMax(
|
|
1536
|
-
unsigned long long* address, unsigned long long val)
|
|
1537
|
-
{
|
|
1538
|
-
unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
|
|
1539
|
-
while (tmp < val) {
|
|
1540
|
-
const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
|
|
1541
|
-
|
|
1542
|
-
if (tmp1 != tmp) { tmp = tmp1; continue; }
|
|
1543
|
-
|
|
1544
|
-
tmp = atomicCAS(address, tmp, val);
|
|
1545
|
-
}
|
|
1546
|
-
|
|
1547
|
-
return tmp;
|
|
1548
|
-
}
|
|
1549
|
-
__device__ inline long long atomicMax(long long* address, long long val) {
|
|
1550
|
-
long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
|
|
1551
|
-
while (tmp < val) {
|
|
1552
|
-
const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
|
|
1553
|
-
|
|
1554
|
-
if (tmp1 != tmp) {
|
|
1555
|
-
tmp = tmp1;
|
|
1556
|
-
continue;
|
|
1557
|
-
}
|
|
1558
|
-
|
|
1559
|
-
tmp = atomicCAS(address, tmp, val);
|
|
1560
|
-
}
|
|
1561
|
-
return tmp;
|
|
1562
|
-
}
|
|
1563
|
-
|
|
1564
|
-
__device__
|
|
1565
|
-
inline
|
|
1566
|
-
unsigned int atomicInc(unsigned int* address, unsigned int val)
|
|
1567
|
-
{
|
|
1568
|
-
return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
|
|
1569
|
-
}
|
|
1570
|
-
|
|
1571
|
-
__device__
|
|
1572
|
-
inline
|
|
1573
|
-
unsigned int atomicDec(unsigned int* address, unsigned int val)
|
|
1574
|
-
{
|
|
1575
|
-
return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
|
|
1576
|
-
}
|
|
1577
|
-
|
|
1578
|
-
__device__
|
|
1579
|
-
inline
|
|
1580
|
-
int atomicAnd(int* address, int val)
|
|
1581
|
-
{
|
|
1582
|
-
return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
|
|
1583
|
-
}
|
|
1584
|
-
__device__
|
|
1585
|
-
inline
|
|
1586
|
-
unsigned int atomicAnd(unsigned int* address, unsigned int val)
|
|
1587
|
-
{
|
|
1588
|
-
return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
|
|
1589
|
-
}
|
|
1590
|
-
__device__
|
|
1591
|
-
inline
|
|
1592
|
-
unsigned long long atomicAnd(
|
|
1593
|
-
unsigned long long* address, unsigned long long val)
|
|
1594
|
-
{
|
|
1595
|
-
return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
|
|
1596
|
-
}
|
|
1597
|
-
|
|
1598
|
-
__device__
|
|
1599
|
-
inline
|
|
1600
|
-
int atomicOr(int* address, int val)
|
|
1601
|
-
{
|
|
1602
|
-
return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
|
|
1603
|
-
}
|
|
1604
|
-
__device__
|
|
1605
|
-
inline
|
|
1606
|
-
unsigned int atomicOr(unsigned int* address, unsigned int val)
|
|
1607
|
-
{
|
|
1608
|
-
return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
|
|
1609
|
-
}
|
|
1610
|
-
__device__
|
|
1611
|
-
inline
|
|
1612
|
-
unsigned long long atomicOr(
|
|
1613
|
-
unsigned long long* address, unsigned long long val)
|
|
1614
|
-
{
|
|
1615
|
-
return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
|
|
1616
|
-
}
|
|
1617
|
-
|
|
1618
|
-
__device__
|
|
1619
|
-
inline
|
|
1620
|
-
int atomicXor(int* address, int val)
|
|
1621
|
-
{
|
|
1622
|
-
return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
|
|
1623
|
-
}
|
|
1624
|
-
__device__
|
|
1625
|
-
inline
|
|
1626
|
-
unsigned int atomicXor(unsigned int* address, unsigned int val)
|
|
1627
|
-
{
|
|
1628
|
-
return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
|
|
1629
|
-
}
|
|
1630
|
-
__device__
|
|
1631
|
-
inline
|
|
1632
|
-
unsigned long long atomicXor(
|
|
1633
|
-
unsigned long long* address, unsigned long long val)
|
|
1634
|
-
{
|
|
1635
|
-
return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
|
|
1636
|
-
}
|
|
1637
|
-
|
|
1638
|
-
#endif // __hip_atomic_compare_exchange_strong
|