triton-windows 3.2.0.post19__cp313-cp313-win_amd64.whl → 3.2.0.post21__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (110) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/backends/amd/driver.py +6 -1
  3. triton/backends/nvidia/compiler.py +1 -3
  4. triton/backends/nvidia/driver.py +7 -3
  5. triton/runtime/autotuner.py +2 -2
  6. triton/runtime/build.py +5 -5
  7. triton/runtime/tcc/lib/python310.def +1610 -0
  8. triton/runtime/tcc/lib/python311.def +1633 -0
  9. triton/runtime/tcc/lib/python312.def +1703 -0
  10. triton/runtime/tcc/lib/python313.def +1651 -0
  11. triton/runtime/tcc/lib/python313t.def +1656 -0
  12. triton/runtime/tcc/lib/python39.def +1644 -0
  13. triton/runtime/tcc/lib/python3t.def +905 -0
  14. triton/windows_utils.py +11 -4
  15. {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/METADATA +1 -1
  16. {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/RECORD +18 -103
  17. {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/WHEEL +1 -1
  18. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  19. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1031
  20. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1612
  21. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1337
  22. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  23. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  24. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  25. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -829
  26. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  27. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  28. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  29. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  30. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  31. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  32. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  33. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  34. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  35. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -494
  36. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  37. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  38. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  39. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  40. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  41. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  42. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  43. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1350
  44. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  45. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  46. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  47. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  48. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  49. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10169
  50. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -77
  51. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -180
  52. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  53. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  54. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  55. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  56. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  57. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  58. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  59. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  60. triton/backends/amd/include/hip/device_functions.h +0 -38
  61. triton/backends/amd/include/hip/driver_types.h +0 -468
  62. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  63. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  64. triton/backends/amd/include/hip/hip_common.h +0 -100
  65. triton/backends/amd/include/hip/hip_complex.h +0 -38
  66. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  67. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  68. triton/backends/amd/include/hip/hip_ext.h +0 -159
  69. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  70. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  71. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  72. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  73. triton/backends/amd/include/hip/hip_profile.h +0 -27
  74. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  75. triton/backends/amd/include/hip/hip_runtime_api.h +0 -8919
  76. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  77. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  78. triton/backends/amd/include/hip/hip_version.h +0 -17
  79. triton/backends/amd/include/hip/hiprtc.h +0 -421
  80. triton/backends/amd/include/hip/library_types.h +0 -78
  81. triton/backends/amd/include/hip/math_functions.h +0 -42
  82. triton/backends/amd/include/hip/surface_types.h +0 -63
  83. triton/backends/amd/include/hip/texture_types.h +0 -194
  84. triton/backends/amd/include/hsa/Brig.h +0 -1131
  85. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  86. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -436
  87. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  88. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  89. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  90. triton/backends/amd/include/hsa/hsa.h +0 -5729
  91. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  92. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -566
  93. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3090
  94. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  95. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  96. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  97. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  98. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  99. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4435
  100. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1467
  101. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3027
  102. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  103. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  104. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  105. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  106. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  107. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  108. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  109. triton/backends/amd/include/roctracer/roctx.h +0 -229
  110. {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/top_level.txt +0 -0
@@ -1,1612 +0,0 @@
1
- /*
2
- Copyright (c) 2015 - Present Advanced Micro Devices, Inc. All rights reserved.
3
-
4
- Permission is hereby granted, free of charge, to any person obtaining a copy
5
- of this software and associated documentation files (the "Software"), to deal
6
- in the Software without restriction, including without limitation the rights
7
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
- copies of the Software, and to permit persons to whom the Software is
9
- furnished to do so, subject to the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be included in
12
- all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
- THE SOFTWARE.
21
- */
22
-
23
- #pragma once
24
-
25
- #if !defined(__HIPCC_RTC__)
26
- #include "amd_device_functions.h"
27
- #endif
28
-
29
- #if __has_builtin(__hip_atomic_compare_exchange_strong)
30
-
31
- template<bool B, typename T, typename F> struct Cond_t;
32
-
33
- template<typename T, typename F> struct Cond_t<true, T, F> { using type = T; };
34
- template<typename T, typename F> struct Cond_t<false, T, F> { using type = F; };
35
-
36
- #if !__HIP_DEVICE_COMPILE__
37
- //TODO: Remove this after compiler pre-defines the following Macros.
38
- #define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
39
- #define __HIP_MEMORY_SCOPE_WAVEFRONT 2
40
- #define __HIP_MEMORY_SCOPE_WORKGROUP 3
41
- #define __HIP_MEMORY_SCOPE_AGENT 4
42
- #define __HIP_MEMORY_SCOPE_SYSTEM 5
43
- #endif
44
-
45
- #if !defined(__HIPCC_RTC__)
46
- #include "amd_hip_unsafe_atomics.h"
47
- #endif
48
-
49
- // Atomic expanders
50
- template<
51
- int mem_order = __ATOMIC_SEQ_CST,
52
- int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
53
- typename T,
54
- typename Op,
55
- typename F>
56
- inline
57
- __attribute__((always_inline, device))
58
- T hip_cas_expander(T* p, T x, Op op, F f) noexcept
59
- {
60
- using FP = __attribute__((address_space(0))) const void*;
61
-
62
- __device__
63
- extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
64
-
65
- if (is_shared_workaround((FP)p))
66
- return f();
67
-
68
- using U = typename Cond_t<
69
- sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
70
-
71
- auto q = reinterpret_cast<U*>(p);
72
-
73
- U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
74
- U tmp1;
75
- do {
76
- tmp1 = tmp0;
77
-
78
- op(reinterpret_cast<T&>(tmp1), x);
79
- } while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
80
- mem_order, mem_scope));
81
-
82
- return reinterpret_cast<const T&>(tmp0);
83
- }
84
-
85
- template<
86
- int mem_order = __ATOMIC_SEQ_CST,
87
- int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
88
- typename T,
89
- typename Cmp,
90
- typename F>
91
- inline
92
- __attribute__((always_inline, device))
93
- T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept
94
- {
95
- using FP = __attribute__((address_space(0))) const void*;
96
-
97
- __device__
98
- extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
99
-
100
- if (is_shared_workaround((FP)p))
101
- return f();
102
-
103
- using U = typename Cond_t<
104
- sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
105
-
106
- auto q = reinterpret_cast<U*>(p);
107
-
108
- U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
109
- while (cmp(x, reinterpret_cast<const T&>(tmp)) &&
110
- !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
111
- mem_scope));
112
-
113
- return reinterpret_cast<const T&>(tmp);
114
- }
115
-
116
- __device__
117
- inline
118
- int atomicCAS(int* address, int compare, int val) {
119
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
120
- __HIP_MEMORY_SCOPE_AGENT);
121
- return compare;
122
- }
123
-
124
- __device__
125
- inline
126
- int atomicCAS_system(int* address, int compare, int val) {
127
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
128
- __HIP_MEMORY_SCOPE_SYSTEM);
129
- return compare;
130
- }
131
-
132
- __device__
133
- inline
134
- unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
135
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
136
- __HIP_MEMORY_SCOPE_AGENT);
137
- return compare;
138
- }
139
-
140
- __device__
141
- inline
142
- unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) {
143
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
144
- __HIP_MEMORY_SCOPE_SYSTEM);
145
- return compare;
146
- }
147
-
148
- __device__
149
- inline
150
- unsigned long atomicCAS(unsigned long* address, unsigned long compare, unsigned long val) {
151
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
152
- __HIP_MEMORY_SCOPE_AGENT);
153
- return compare;
154
- }
155
-
156
- __device__
157
- inline
158
- unsigned long atomicCAS_system(unsigned long* address, unsigned long compare, unsigned long val) {
159
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
160
- __HIP_MEMORY_SCOPE_SYSTEM);
161
- return compare;
162
- }
163
-
164
- __device__
165
- inline
166
- unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare,
167
- unsigned long long val) {
168
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
169
- __HIP_MEMORY_SCOPE_AGENT);
170
- return compare;
171
- }
172
-
173
- __device__
174
- inline
175
- unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare,
176
- unsigned long long val) {
177
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
178
- __HIP_MEMORY_SCOPE_SYSTEM);
179
- return compare;
180
- }
181
-
182
- __device__
183
- inline
184
- float atomicCAS(float* address, float compare, float val) {
185
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
186
- __HIP_MEMORY_SCOPE_AGENT);
187
- return compare;
188
- }
189
-
190
- __device__
191
- inline
192
- float atomicCAS_system(float* address, float compare, float val) {
193
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
194
- __HIP_MEMORY_SCOPE_SYSTEM);
195
- return compare;
196
- }
197
-
198
- __device__
199
- inline
200
- double atomicCAS(double* address, double compare, double val) {
201
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
202
- __HIP_MEMORY_SCOPE_AGENT);
203
- return compare;
204
- }
205
-
206
- __device__
207
- inline
208
- double atomicCAS_system(double* address, double compare, double val) {
209
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
210
- __HIP_MEMORY_SCOPE_SYSTEM);
211
- return compare;
212
- }
213
-
214
- __device__
215
- inline
216
- int atomicAdd(int* address, int val) {
217
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
218
- }
219
-
220
- __device__
221
- inline
222
- int atomicAdd_system(int* address, int val) {
223
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
224
- }
225
-
226
- __device__
227
- inline
228
- unsigned int atomicAdd(unsigned int* address, unsigned int val) {
229
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
230
- }
231
-
232
- __device__
233
- inline
234
- unsigned int atomicAdd_system(unsigned int* address, unsigned int val) {
235
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
236
- }
237
-
238
- __device__
239
- inline
240
- unsigned long atomicAdd(unsigned long* address, unsigned long val) {
241
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
242
- }
243
-
244
- __device__
245
- inline
246
- unsigned long atomicAdd_system(unsigned long* address, unsigned long val) {
247
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
248
- }
249
-
250
- __device__
251
- inline
252
- unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) {
253
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
254
- }
255
-
256
- __device__
257
- inline
258
- unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) {
259
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
260
- }
261
-
262
- __device__
263
- inline
264
- float atomicAdd(float* address, float val) {
265
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
266
- return unsafeAtomicAdd(address, val);
267
- #else
268
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
269
- #endif
270
- }
271
-
272
- __device__
273
- inline
274
- float atomicAdd_system(float* address, float val) {
275
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
276
- }
277
-
278
- #if !defined(__HIPCC_RTC__)
279
- DEPRECATED("use atomicAdd instead")
280
- #endif // !defined(__HIPCC_RTC__)
281
- __device__
282
- inline
283
- void atomicAddNoRet(float* address, float val)
284
- {
285
- __ockl_atomic_add_noret_f32(address, val);
286
- }
287
-
288
- __device__
289
- inline
290
- double atomicAdd(double* address, double val) {
291
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
292
- return unsafeAtomicAdd(address, val);
293
- #else
294
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
295
- #endif
296
- }
297
-
298
- __device__
299
- inline
300
- double atomicAdd_system(double* address, double val) {
301
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
302
- }
303
-
304
- __device__
305
- inline
306
- int atomicSub(int* address, int val) {
307
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
308
- }
309
-
310
- __device__
311
- inline
312
- int atomicSub_system(int* address, int val) {
313
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
314
- }
315
-
316
- __device__
317
- inline
318
- unsigned int atomicSub(unsigned int* address, unsigned int val) {
319
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
320
- }
321
-
322
- __device__
323
- inline
324
- unsigned int atomicSub_system(unsigned int* address, unsigned int val) {
325
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
326
- }
327
-
328
- __device__
329
- inline
330
- unsigned long atomicSub(unsigned long* address, unsigned long val) {
331
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
332
- }
333
-
334
- __device__
335
- inline
336
- unsigned long atomicSub_system(unsigned long* address, unsigned long val) {
337
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
338
- }
339
-
340
- __device__
341
- inline
342
- unsigned long long atomicSub(unsigned long long* address, unsigned long long val) {
343
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
344
- }
345
-
346
- __device__
347
- inline
348
- unsigned long long atomicSub_system(unsigned long long* address, unsigned long long val) {
349
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
350
- }
351
-
352
- __device__
353
- inline
354
- float atomicSub(float* address, float val) {
355
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
356
- return unsafeAtomicAdd(address, -val);
357
- #else
358
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
359
- #endif
360
- }
361
-
362
- __device__
363
- inline
364
- float atomicSub_system(float* address, float val) {
365
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
366
- }
367
-
368
- __device__
369
- inline
370
- double atomicSub(double* address, double val) {
371
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
372
- return unsafeAtomicAdd(address, -val);
373
- #else
374
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
375
- #endif
376
- }
377
-
378
- __device__
379
- inline
380
- double atomicSub_system(double* address, double val) {
381
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
382
- }
383
-
384
- __device__
385
- inline
386
- int atomicExch(int* address, int val) {
387
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
388
- }
389
-
390
- __device__
391
- inline
392
- int atomicExch_system(int* address, int val) {
393
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
394
- }
395
-
396
- __device__
397
- inline
398
- unsigned int atomicExch(unsigned int* address, unsigned int val) {
399
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
400
- }
401
-
402
- __device__
403
- inline
404
- unsigned int atomicExch_system(unsigned int* address, unsigned int val) {
405
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
406
- }
407
-
408
- __device__
409
- inline
410
- unsigned long atomicExch(unsigned long* address, unsigned long val) {
411
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
412
- }
413
-
414
- __device__
415
- inline
416
- unsigned long atomicExch_system(unsigned long* address, unsigned long val) {
417
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
418
- }
419
-
420
- __device__
421
- inline
422
- unsigned long long atomicExch(unsigned long long* address, unsigned long long val) {
423
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
424
- }
425
-
426
- __device__
427
- inline
428
- unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) {
429
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
430
- }
431
-
432
- __device__
433
- inline
434
- float atomicExch(float* address, float val) {
435
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
436
- }
437
-
438
- __device__
439
- inline
440
- float atomicExch_system(float* address, float val) {
441
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
442
- }
443
-
444
- __device__
445
- inline
446
- double atomicExch(double* address, double val) {
447
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
448
- }
449
-
450
- __device__
451
- inline
452
- double atomicExch_system(double* address, double val) {
453
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
454
- }
455
-
456
- __device__
457
- inline
458
- int atomicMin(int* address, int val) {
459
- #if defined(__gfx941__)
460
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
461
- address, val, [](int x, int y) { return x < y; }, [=]() {
462
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
463
- __HIP_MEMORY_SCOPE_AGENT);
464
- });
465
- #else
466
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
467
- #endif // __gfx941__
468
- }
469
-
470
- __device__
471
- inline
472
- int atomicMin_system(int* address, int val) {
473
- #if defined(__gfx941__)
474
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
475
- address, val, [](int x, int y) { return x < y; }, [=]() {
476
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
477
- __HIP_MEMORY_SCOPE_SYSTEM);
478
- });
479
- #else
480
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
481
- #endif // __gfx941__
482
- }
483
-
484
- __device__
485
- inline
486
- unsigned int atomicMin(unsigned int* address, unsigned int val) {
487
- #if defined(__gfx941__)
488
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
489
- address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
490
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
491
- __HIP_MEMORY_SCOPE_AGENT);
492
- });
493
- #else
494
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
495
- #endif // __gfx941__
496
-
497
- }
498
-
499
- __device__
500
- inline
501
- unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
502
- #if defined(__gfx941__)
503
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
504
- address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
505
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
506
- __HIP_MEMORY_SCOPE_SYSTEM);
507
- });
508
- #else
509
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
510
- #endif // __gfx941__
511
- }
512
-
513
- __device__
514
- inline
515
- unsigned long long atomicMin(unsigned long* address, unsigned long val) {
516
- #if defined(__gfx941__)
517
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
518
- address,
519
- val,
520
- [](unsigned long x, unsigned long y) { return x < y; },
521
- [=]() {
522
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
523
- __HIP_MEMORY_SCOPE_AGENT);
524
- });
525
- #else
526
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
527
- #endif // __gfx941__
528
- }
529
-
530
- __device__
531
- inline
532
- unsigned long atomicMin_system(unsigned long* address, unsigned long val) {
533
- #if defined(__gfx941__)
534
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
535
- address,
536
- val,
537
- [](unsigned long x, unsigned long y) { return x < y; },
538
- [=]() {
539
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
540
- __HIP_MEMORY_SCOPE_SYSTEM);
541
- });
542
- #else
543
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
544
- #endif // __gfx941__
545
- }
546
-
547
- __device__
548
- inline
549
- unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
550
- #if defined(__gfx941__)
551
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
552
- address,
553
- val,
554
- [](unsigned long long x, unsigned long long y) { return x < y; },
555
- [=]() {
556
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
557
- __HIP_MEMORY_SCOPE_AGENT);
558
- });
559
- #else
560
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
561
- #endif // __gfx941__
562
- }
563
-
564
- __device__
565
- inline
566
- unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
567
- #if defined(__gfx941__)
568
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
569
- address,
570
- val,
571
- [](unsigned long long x, unsigned long long y) { return x < y; },
572
- [=]() {
573
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
574
- __HIP_MEMORY_SCOPE_SYSTEM);
575
- });
576
- #else
577
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
578
- #endif // __gfx941__
579
- }
580
-
581
- __device__
582
- inline
583
- long long atomicMin(long long* address, long long val) {
584
- #if defined(__gfx941__)
585
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
586
- address, val, [](long long x, long long y) { return x < y; },
587
- [=]() {
588
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
589
- });
590
- #else
591
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
592
- #endif // __gfx941__
593
- }
594
-
595
- __device__
596
- inline
597
- long long atomicMin_system(long long* address, long long val) {
598
- #if defined(__gfx941__)
599
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
600
- address, val, [](long long x, long long y) { return x < y; },
601
- [=]() {
602
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
603
- });
604
- #else
605
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
606
- #endif // __gfx941__
607
- }
608
-
609
- __device__
610
- inline
611
- float atomicMin(float* addr, float val) {
612
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
613
- return unsafeAtomicMin(addr, val);
614
- #else
615
- #if __has_builtin(__hip_atomic_load) && \
616
- __has_builtin(__hip_atomic_compare_exchange_strong)
617
- float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
618
- bool done = false;
619
- while (!done && value > val) {
620
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
621
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
622
- }
623
- return value;
624
- #else
625
- unsigned int *uaddr = (unsigned int *)addr;
626
- unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
627
- bool done = false;
628
- while (!done && __uint_as_float(value) > val) {
629
- done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
630
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
631
- }
632
- return __uint_as_float(value);
633
- #endif
634
- #endif
635
- }
636
-
637
- __device__
638
- inline
639
- float atomicMin_system(float* address, float val) {
640
- unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
641
- #if __has_builtin(__hip_atomic_load)
642
- unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
643
- #else
644
- unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
645
- #endif
646
- float value = __uint_as_float(tmp);
647
-
648
- while (val < value) {
649
- value = atomicCAS_system(address, value, val);
650
- }
651
-
652
- return value;
653
- }
654
-
655
- __device__
656
- inline
657
- double atomicMin(double* addr, double val) {
658
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
659
- return unsafeAtomicMin(addr, val);
660
- #else
661
- #if __has_builtin(__hip_atomic_load) && \
662
- __has_builtin(__hip_atomic_compare_exchange_strong)
663
- double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
664
- bool done = false;
665
- while (!done && value > val) {
666
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
667
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
668
- }
669
- return value;
670
- #else
671
- unsigned long long *uaddr = (unsigned long long *)addr;
672
- unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
673
- bool done = false;
674
- while (!done && __longlong_as_double(value) > val) {
675
- done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
676
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
677
- }
678
- return __longlong_as_double(value);
679
- #endif
680
- #endif
681
- }
682
-
683
- __device__
684
- inline
685
- double atomicMin_system(double* address, double val) {
686
- unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
687
- #if __has_builtin(__hip_atomic_load)
688
- unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
689
- #else
690
- unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
691
- #endif
692
- double value = __longlong_as_double(tmp);
693
-
694
- while (val < value) {
695
- value = atomicCAS_system(address, value, val);
696
- }
697
-
698
- return value;
699
- }
700
-
701
- __device__
702
- inline
703
- int atomicMax(int* address, int val) {
704
- #if defined(__gfx941__)
705
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
706
- address, val, [](int x, int y) { return y < x; }, [=]() {
707
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
708
- __HIP_MEMORY_SCOPE_AGENT);
709
- });
710
- #else
711
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
712
- #endif // __gfx941__
713
- }
714
-
715
- __device__
716
- inline
717
- int atomicMax_system(int* address, int val) {
718
- #if defined(__gfx941__)
719
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
720
- address, val, [](int x, int y) { return y < x; }, [=]() {
721
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
722
- __HIP_MEMORY_SCOPE_SYSTEM);
723
- });
724
- #else
725
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
726
- #endif // __gfx941__
727
- }
728
-
729
- __device__
730
- inline
731
- unsigned int atomicMax(unsigned int* address, unsigned int val) {
732
- #if defined(__gfx941__)
733
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
734
- address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
735
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
736
- __HIP_MEMORY_SCOPE_AGENT);
737
- });
738
- #else
739
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
740
- #endif // __gfx941__
741
- }
742
-
743
- __device__
744
- inline
745
- unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
746
- #if defined(__gfx941__)
747
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
748
- address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
749
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
750
- __HIP_MEMORY_SCOPE_SYSTEM);
751
- });
752
- #else
753
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
754
- #endif // __gfx941__
755
- }
756
-
757
- __device__
758
- inline
759
- unsigned long atomicMax(unsigned long* address, unsigned long val) {
760
- #if defined(__gfx941__)
761
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
762
- address,
763
- val,
764
- [](unsigned long x, unsigned long y) { return y < x; },
765
- [=]() {
766
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
767
- __HIP_MEMORY_SCOPE_AGENT);
768
- });
769
- #else
770
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
771
- #endif // __gfx941__
772
- }
773
-
774
- __device__
775
- inline
776
- unsigned long atomicMax_system(unsigned long* address, unsigned long val) {
777
- #if defined(__gfx941__)
778
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
779
- address,
780
- val,
781
- [](unsigned long x, unsigned long y) { return y < x; },
782
- [=]() {
783
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
784
- __HIP_MEMORY_SCOPE_SYSTEM);
785
- });
786
- #else
787
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
788
- #endif // __gfx941__
789
- }
790
-
791
- __device__
792
- inline
793
- unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
794
- #if defined(__gfx941__)
795
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
796
- address,
797
- val,
798
- [](unsigned long long x, unsigned long long y) { return y < x; },
799
- [=]() {
800
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
801
- __HIP_MEMORY_SCOPE_AGENT);
802
- });
803
- #else
804
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
805
- #endif // __gfx941__
806
- }
807
-
808
- __device__
809
- inline
810
- unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
811
- #if defined(__gfx941__)
812
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
813
- address,
814
- val,
815
- [](unsigned long long x, unsigned long long y) { return y < x; },
816
- [=]() {
817
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
818
- __HIP_MEMORY_SCOPE_SYSTEM);
819
- });
820
- #else
821
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
822
- #endif // __gfx941__
823
- }
824
-
825
- __device__
826
- inline
827
- long long atomicMax(long long* address, long long val) {
828
- #if defined(__gfx941__)
829
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
830
- address, val, [](long long x, long long y) { return y < x; },
831
- [=]() {
832
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
833
- });
834
- #else
835
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
836
- #endif // __gfx941__
837
- }
838
-
839
- __device__
840
- inline
841
- long long atomicMax_system(long long* address, long long val) {
842
- #if defined(__gfx941__)
843
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
844
- address, val, [](long long x, long long y) { return y < x; },
845
- [=]() {
846
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
847
- });
848
- #else
849
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
850
- #endif // __gfx941__
851
- }
852
-
853
- __device__
854
- inline
855
- float atomicMax(float* addr, float val) {
856
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
857
- return unsafeAtomicMax(addr, val);
858
- #else
859
- #if __has_builtin(__hip_atomic_load) && \
860
- __has_builtin(__hip_atomic_compare_exchange_strong)
861
- float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
862
- bool done = false;
863
- while (!done && value < val) {
864
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
865
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
866
- }
867
- return value;
868
- #else
869
- unsigned int *uaddr = (unsigned int *)addr;
870
- unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
871
- bool done = false;
872
- while (!done && __uint_as_float(value) < val) {
873
- done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
874
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
875
- }
876
- return __uint_as_float(value);
877
- #endif
878
- #endif
879
- }
880
-
881
- __device__
882
- inline
883
- float atomicMax_system(float* address, float val) {
884
- unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
885
- #if __has_builtin(__hip_atomic_load)
886
- unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
887
- #else
888
- unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
889
- #endif
890
- float value = __uint_as_float(tmp);
891
-
892
- while (value < val) {
893
- value = atomicCAS_system(address, value, val);
894
- }
895
-
896
- return value;
897
- }
898
-
899
- __device__
900
- inline
901
- double atomicMax(double* addr, double val) {
902
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
903
- return unsafeAtomicMax(addr, val);
904
- #else
905
- #if __has_builtin(__hip_atomic_load) && \
906
- __has_builtin(__hip_atomic_compare_exchange_strong)
907
- double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
908
- bool done = false;
909
- while (!done && value < val) {
910
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
911
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
912
- }
913
- return value;
914
- #else
915
- unsigned long long *uaddr = (unsigned long long *)addr;
916
- unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
917
- bool done = false;
918
- while (!done && __longlong_as_double(value) < val) {
919
- done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
920
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
921
- }
922
- return __longlong_as_double(value);
923
- #endif
924
- #endif
925
- }
926
-
927
- __device__
928
- inline
929
- double atomicMax_system(double* address, double val) {
930
- unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
931
- #if __has_builtin(__hip_atomic_load)
932
- unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
933
- #else
934
- unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
935
- #endif
936
- double value = __longlong_as_double(tmp);
937
-
938
- while (value < val) {
939
- value = atomicCAS_system(address, value, val);
940
- }
941
-
942
- return value;
943
- }
944
-
945
- __device__
946
- inline
947
- unsigned int atomicInc(unsigned int* address, unsigned int val)
948
- {
949
- #if defined(__gfx941__)
950
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
951
- address,
952
- val,
953
- [](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
954
- [=]() {
955
- return
956
- __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
957
- });
958
- #else
959
- return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
960
- #endif // __gfx941__
961
-
962
- }
963
-
964
- __device__
965
- inline
966
- unsigned int atomicDec(unsigned int* address, unsigned int val)
967
- {
968
- #if defined(__gfx941__)
969
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
970
- address,
971
- val,
972
- [](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
973
- [=]() {
974
- return
975
- __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
976
- });
977
- #else
978
- return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
979
- #endif // __gfx941__
980
-
981
- }
982
-
983
- __device__
984
- inline
985
- int atomicAnd(int* address, int val) {
986
- #if defined(__gfx941__)
987
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
988
- address, val, [](int& x, int y) { x &= y; }, [=]() {
989
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
990
- __HIP_MEMORY_SCOPE_AGENT);
991
- });
992
- #else
993
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
994
- #endif // __gfx941__
995
- }
996
-
997
- __device__
998
- inline
999
- int atomicAnd_system(int* address, int val) {
1000
- #if defined(__gfx941__)
1001
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1002
- address, val, [](int& x, int y) { x &= y; }, [=]() {
1003
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1004
- __HIP_MEMORY_SCOPE_SYSTEM);
1005
- });
1006
- #else
1007
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1008
- #endif // __gfx941__
1009
- }
1010
-
1011
- __device__
1012
- inline
1013
- unsigned int atomicAnd(unsigned int* address, unsigned int val) {
1014
- #if defined(__gfx941__)
1015
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1016
- address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
1017
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1018
- __HIP_MEMORY_SCOPE_AGENT);
1019
- });
1020
- #else
1021
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1022
- #endif // __gfx941__
1023
- }
1024
-
1025
- __device__
1026
- inline
1027
- unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
1028
- #if defined(__gfx941__)
1029
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1030
- address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
1031
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1032
- __HIP_MEMORY_SCOPE_SYSTEM);
1033
- });
1034
- #else
1035
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1036
- #endif // __gfx941__
1037
- }
1038
-
1039
- __device__
1040
- inline
1041
- unsigned long atomicAnd(unsigned long* address, unsigned long val) {
1042
- #if defined(__gfx941__)
1043
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1044
- address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
1045
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1046
- __HIP_MEMORY_SCOPE_AGENT);
1047
- });
1048
- #else
1049
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1050
- #endif // __gfx941__
1051
- }
1052
-
1053
- __device__
1054
- inline
1055
- unsigned long atomicAnd_system(unsigned long* address, unsigned long val) {
1056
- #if defined(__gfx941__)
1057
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1058
- address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
1059
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1060
- __HIP_MEMORY_SCOPE_SYSTEM);
1061
- });
1062
- #else
1063
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1064
- #endif // __gfx941__
1065
- }
1066
-
1067
- __device__
1068
- inline
1069
- unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
1070
- #if defined(__gfx941__)
1071
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1072
- address,
1073
- val,
1074
- [](unsigned long long& x, unsigned long long y) { x &= y; },
1075
- [=]() {
1076
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1077
- __HIP_MEMORY_SCOPE_AGENT);
1078
- });
1079
- #else
1080
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1081
- #endif // __gfx941__
1082
- }
1083
-
1084
- __device__
1085
- inline
1086
- unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
1087
- #if defined(__gfx941__)
1088
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1089
- address,
1090
- val,
1091
- [](unsigned long long& x, unsigned long long y) { x &= y; },
1092
- [=]() {
1093
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1094
- __HIP_MEMORY_SCOPE_SYSTEM);
1095
- });
1096
- #else
1097
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1098
- #endif // __gfx941__
1099
- }
1100
-
1101
- __device__
1102
- inline
1103
- int atomicOr(int* address, int val) {
1104
- #if defined(__gfx941__)
1105
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1106
- address, val, [](int& x, int y) { x |= y; }, [=]() {
1107
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1108
- __HIP_MEMORY_SCOPE_AGENT);
1109
- });
1110
- #else
1111
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1112
- #endif // __gfx941__
1113
- }
1114
-
1115
- __device__
1116
- inline
1117
- int atomicOr_system(int* address, int val) {
1118
- #if defined(__gfx941__)
1119
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1120
- address, val, [](int& x, int y) { x |= y; }, [=]() {
1121
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1122
- __HIP_MEMORY_SCOPE_SYSTEM);
1123
- });
1124
- #else
1125
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1126
- #endif // __gfx941__
1127
- }
1128
-
1129
- __device__
1130
- inline
1131
- unsigned int atomicOr(unsigned int* address, unsigned int val) {
1132
- #if defined(__gfx941__)
1133
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1134
- address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
1135
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1136
- __HIP_MEMORY_SCOPE_AGENT);
1137
- });
1138
- #else
1139
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1140
- #endif // __gfx941__
1141
- }
1142
-
1143
- __device__
1144
- inline
1145
- unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
1146
- #if defined(__gfx941__)
1147
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1148
- address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
1149
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1150
- __HIP_MEMORY_SCOPE_SYSTEM);
1151
- });
1152
- #else
1153
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1154
- #endif // __gfx941__
1155
- }
1156
-
1157
- __device__
1158
- inline
1159
- unsigned long atomicOr(unsigned long* address, unsigned long val) {
1160
- #if defined(__gfx941__)
1161
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1162
- address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
1163
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1164
- __HIP_MEMORY_SCOPE_AGENT);
1165
- });
1166
- #else
1167
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1168
- #endif // __gfx941__
1169
- }
1170
-
1171
- __device__
1172
- inline
1173
- unsigned long atomicOr_system(unsigned long* address, unsigned long val) {
1174
- #if defined(__gfx941__)
1175
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1176
- address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
1177
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1178
- __HIP_MEMORY_SCOPE_SYSTEM);
1179
- });
1180
- #else
1181
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1182
- #endif // __gfx941__
1183
- }
1184
-
1185
- __device__
1186
- inline
1187
- unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
1188
- #if defined(__gfx941__)
1189
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1190
- address,
1191
- val,
1192
- [](unsigned long long& x, unsigned long long y) { x |= y; },
1193
- [=]() {
1194
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1195
- __HIP_MEMORY_SCOPE_AGENT);
1196
- });
1197
- #else
1198
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1199
- #endif // __gfx941__
1200
- }
1201
-
1202
- __device__
1203
- inline
1204
- unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
1205
- #if defined(__gfx941__)
1206
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1207
- address,
1208
- val,
1209
- [](unsigned long long& x, unsigned long long y) { x |= y; },
1210
- [=]() {
1211
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1212
- __HIP_MEMORY_SCOPE_SYSTEM);
1213
- });
1214
- #else
1215
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1216
- #endif // __gfx941__
1217
- }
1218
-
1219
- __device__
1220
- inline
1221
- int atomicXor(int* address, int val) {
1222
- #if defined(__gfx941__)
1223
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1224
- address, val, [](int& x, int y) { x ^= y; }, [=]() {
1225
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1226
- __HIP_MEMORY_SCOPE_AGENT);
1227
- });
1228
- #else
1229
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1230
- #endif // __gfx941__
1231
- }
1232
-
1233
- __device__
1234
- inline
1235
- int atomicXor_system(int* address, int val) {
1236
- #if defined(__gfx941__)
1237
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1238
- address, val, [](int& x, int y) { x ^= y; }, [=]() {
1239
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1240
- __HIP_MEMORY_SCOPE_SYSTEM);
1241
- });
1242
- #else
1243
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1244
- #endif // __gfx941__
1245
- }
1246
-
1247
- __device__
1248
- inline
1249
- unsigned int atomicXor(unsigned int* address, unsigned int val) {
1250
- #if defined(__gfx941__)
1251
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1252
- address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
1253
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1254
- __HIP_MEMORY_SCOPE_AGENT);
1255
- });
1256
- #else
1257
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1258
- #endif // __gfx941__
1259
- }
1260
-
1261
- __device__
1262
- inline
1263
- unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
1264
- #if defined(__gfx941__)
1265
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1266
- address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
1267
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1268
- __HIP_MEMORY_SCOPE_SYSTEM);
1269
- });
1270
- #else
1271
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1272
- #endif // __gfx941__
1273
- }
1274
-
1275
- __device__
1276
- inline
1277
- unsigned long atomicXor(unsigned long* address, unsigned long val) {
1278
- #if defined(__gfx941__)
1279
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1280
- address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
1281
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1282
- __HIP_MEMORY_SCOPE_AGENT);
1283
- });
1284
- #else
1285
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1286
- #endif // __gfx941__
1287
- }
1288
-
1289
- __device__
1290
- inline
1291
- unsigned long atomicXor_system(unsigned long* address, unsigned long val) {
1292
- #if defined(__gfx941__)
1293
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1294
- address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
1295
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1296
- __HIP_MEMORY_SCOPE_SYSTEM);
1297
- });
1298
- #else
1299
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1300
- #endif // __gfx941__
1301
- }
1302
-
1303
- __device__
1304
- inline
1305
- unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
1306
- #if defined(__gfx941__)
1307
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1308
- address,
1309
- val,
1310
- [](unsigned long long& x, unsigned long long y) { x ^= y; },
1311
- [=]() {
1312
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1313
- __HIP_MEMORY_SCOPE_AGENT);
1314
- });
1315
- #else
1316
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1317
- #endif // __gfx941__
1318
- }
1319
-
1320
- __device__
1321
- inline
1322
- unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) {
1323
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1324
- }
1325
-
1326
- #else // __hip_atomic_compare_exchange_strong
1327
-
1328
- __device__
1329
- inline
1330
- int atomicCAS(int* address, int compare, int val)
1331
- {
1332
- __atomic_compare_exchange_n(
1333
- address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1334
-
1335
- return compare;
1336
- }
1337
- __device__
1338
- inline
1339
- unsigned int atomicCAS(
1340
- unsigned int* address, unsigned int compare, unsigned int val)
1341
- {
1342
- __atomic_compare_exchange_n(
1343
- address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1344
-
1345
- return compare;
1346
- }
1347
- __device__
1348
- inline
1349
- unsigned long long atomicCAS(
1350
- unsigned long long* address,
1351
- unsigned long long compare,
1352
- unsigned long long val)
1353
- {
1354
- __atomic_compare_exchange_n(
1355
- address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1356
-
1357
- return compare;
1358
- }
1359
-
1360
- __device__
1361
- inline
1362
- int atomicAdd(int* address, int val)
1363
- {
1364
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1365
- }
1366
- __device__
1367
- inline
1368
- unsigned int atomicAdd(unsigned int* address, unsigned int val)
1369
- {
1370
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1371
- }
1372
- __device__
1373
- inline
1374
- unsigned long long atomicAdd(
1375
- unsigned long long* address, unsigned long long val)
1376
- {
1377
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1378
- }
1379
- __device__
1380
- inline
1381
- float atomicAdd(float* address, float val)
1382
- {
1383
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1384
- return unsafeAtomicAdd(address, val);
1385
- #else
1386
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1387
- #endif
1388
- }
1389
-
1390
- #if !defined(__HIPCC_RTC__)
1391
- DEPRECATED("use atomicAdd instead")
1392
- #endif // !defined(__HIPCC_RTC__)
1393
- __device__
1394
- inline
1395
- void atomicAddNoRet(float* address, float val)
1396
- {
1397
- __ockl_atomic_add_noret_f32(address, val);
1398
- }
1399
-
1400
- __device__
1401
- inline
1402
- double atomicAdd(double* address, double val)
1403
- {
1404
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1405
- return unsafeAtomicAdd(address, val);
1406
- #else
1407
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1408
- #endif
1409
- }
1410
-
1411
- __device__
1412
- inline
1413
- int atomicSub(int* address, int val)
1414
- {
1415
- return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1416
- }
1417
- __device__
1418
- inline
1419
- unsigned int atomicSub(unsigned int* address, unsigned int val)
1420
- {
1421
- return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1422
- }
1423
-
1424
- __device__
1425
- inline
1426
- int atomicExch(int* address, int val)
1427
- {
1428
- return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1429
- }
1430
- __device__
1431
- inline
1432
- unsigned int atomicExch(unsigned int* address, unsigned int val)
1433
- {
1434
- return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1435
- }
1436
- __device__
1437
- inline
1438
- unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
1439
- {
1440
- return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1441
- }
1442
- __device__
1443
- inline
1444
- float atomicExch(float* address, float val)
1445
- {
1446
- return __uint_as_float(__atomic_exchange_n(
1447
- reinterpret_cast<unsigned int*>(address),
1448
- __float_as_uint(val),
1449
- __ATOMIC_RELAXED));
1450
- }
1451
-
1452
- __device__
1453
- inline
1454
- int atomicMin(int* address, int val)
1455
- {
1456
- return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1457
- }
1458
- __device__
1459
- inline
1460
- unsigned int atomicMin(unsigned int* address, unsigned int val)
1461
- {
1462
- return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1463
- }
1464
- __device__
1465
- inline
1466
- unsigned long long atomicMin(
1467
- unsigned long long* address, unsigned long long val)
1468
- {
1469
- unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1470
- while (val < tmp) {
1471
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1472
-
1473
- if (tmp1 != tmp) { tmp = tmp1; continue; }
1474
-
1475
- tmp = atomicCAS(address, tmp, val);
1476
- }
1477
-
1478
- return tmp;
1479
- }
1480
- __device__ inline long long atomicMin(long long* address, long long val) {
1481
- long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1482
- while (val < tmp) {
1483
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1484
-
1485
- if (tmp1 != tmp) {
1486
- tmp = tmp1;
1487
- continue;
1488
- }
1489
-
1490
- tmp = atomicCAS(address, tmp, val);
1491
- }
1492
- return tmp;
1493
- }
1494
-
1495
- __device__
1496
- inline
1497
- int atomicMax(int* address, int val)
1498
- {
1499
- return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1500
- }
1501
- __device__
1502
- inline
1503
- unsigned int atomicMax(unsigned int* address, unsigned int val)
1504
- {
1505
- return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1506
- }
1507
- __device__
1508
- inline
1509
- unsigned long long atomicMax(
1510
- unsigned long long* address, unsigned long long val)
1511
- {
1512
- unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1513
- while (tmp < val) {
1514
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1515
-
1516
- if (tmp1 != tmp) { tmp = tmp1; continue; }
1517
-
1518
- tmp = atomicCAS(address, tmp, val);
1519
- }
1520
-
1521
- return tmp;
1522
- }
1523
- __device__ inline long long atomicMax(long long* address, long long val) {
1524
- long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1525
- while (tmp < val) {
1526
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1527
-
1528
- if (tmp1 != tmp) {
1529
- tmp = tmp1;
1530
- continue;
1531
- }
1532
-
1533
- tmp = atomicCAS(address, tmp, val);
1534
- }
1535
- return tmp;
1536
- }
1537
-
1538
- __device__
1539
- inline
1540
- unsigned int atomicInc(unsigned int* address, unsigned int val)
1541
- {
1542
- return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
1543
- }
1544
-
1545
- __device__
1546
- inline
1547
- unsigned int atomicDec(unsigned int* address, unsigned int val)
1548
- {
1549
- return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
1550
- }
1551
-
1552
- __device__
1553
- inline
1554
- int atomicAnd(int* address, int val)
1555
- {
1556
- return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1557
- }
1558
- __device__
1559
- inline
1560
- unsigned int atomicAnd(unsigned int* address, unsigned int val)
1561
- {
1562
- return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1563
- }
1564
- __device__
1565
- inline
1566
- unsigned long long atomicAnd(
1567
- unsigned long long* address, unsigned long long val)
1568
- {
1569
- return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1570
- }
1571
-
1572
- __device__
1573
- inline
1574
- int atomicOr(int* address, int val)
1575
- {
1576
- return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1577
- }
1578
- __device__
1579
- inline
1580
- unsigned int atomicOr(unsigned int* address, unsigned int val)
1581
- {
1582
- return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1583
- }
1584
- __device__
1585
- inline
1586
- unsigned long long atomicOr(
1587
- unsigned long long* address, unsigned long long val)
1588
- {
1589
- return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1590
- }
1591
-
1592
- __device__
1593
- inline
1594
- int atomicXor(int* address, int val)
1595
- {
1596
- return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1597
- }
1598
- __device__
1599
- inline
1600
- unsigned int atomicXor(unsigned int* address, unsigned int val)
1601
- {
1602
- return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1603
- }
1604
- __device__
1605
- inline
1606
- unsigned long long atomicXor(
1607
- unsigned long long* address, unsigned long long val)
1608
- {
1609
- return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1610
- }
1611
-
1612
- #endif // __hip_atomic_compare_exchange_strong