triton-windows 3.3.0.post19__cp39-cp39-win_amd64.whl → 3.4.0.post20__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (173) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +4 -1
  3. triton/_filecheck.py +87 -0
  4. triton/_internal_testing.py +26 -15
  5. triton/_utils.py +110 -21
  6. triton/backends/__init__.py +20 -23
  7. triton/backends/amd/__init__.py +0 -0
  8. triton/backends/amd/compiler.py +112 -78
  9. triton/backends/amd/driver.c +5 -2
  10. triton/backends/amd/driver.py +149 -47
  11. triton/backends/compiler.py +7 -21
  12. triton/backends/nvidia/bin/ptxas.exe +0 -0
  13. triton/backends/nvidia/compiler.py +92 -93
  14. triton/backends/nvidia/driver.c +90 -98
  15. triton/backends/nvidia/driver.py +303 -128
  16. triton/compiler/code_generator.py +212 -111
  17. triton/compiler/compiler.py +110 -25
  18. triton/experimental/__init__.py +0 -0
  19. triton/experimental/gluon/__init__.py +4 -0
  20. triton/experimental/gluon/_compiler.py +0 -0
  21. triton/experimental/gluon/_runtime.py +99 -0
  22. triton/experimental/gluon/language/__init__.py +18 -0
  23. triton/experimental/gluon/language/_core.py +312 -0
  24. triton/experimental/gluon/language/_layouts.py +230 -0
  25. triton/experimental/gluon/language/_math.py +12 -0
  26. triton/experimental/gluon/language/_semantic.py +287 -0
  27. triton/experimental/gluon/language/_standard.py +47 -0
  28. triton/experimental/gluon/language/nvidia/__init__.py +4 -0
  29. triton/experimental/gluon/language/nvidia/blackwell/__init__.py +202 -0
  30. triton/experimental/gluon/language/nvidia/blackwell/tma.py +32 -0
  31. triton/experimental/gluon/language/nvidia/hopper/__init__.py +11 -0
  32. triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +51 -0
  33. triton/experimental/gluon/language/nvidia/hopper/tma.py +96 -0
  34. triton/experimental/gluon/nvidia/__init__.py +4 -0
  35. triton/experimental/gluon/nvidia/blackwell.py +3 -0
  36. triton/experimental/gluon/nvidia/hopper.py +40 -0
  37. triton/knobs.py +481 -0
  38. triton/language/__init__.py +39 -14
  39. triton/language/core.py +794 -537
  40. triton/language/extra/cuda/__init__.py +10 -7
  41. triton/language/extra/cuda/gdc.py +42 -0
  42. triton/language/extra/cuda/libdevice.py +394 -394
  43. triton/language/extra/cuda/utils.py +21 -21
  44. triton/language/extra/hip/libdevice.py +113 -104
  45. triton/language/math.py +65 -66
  46. triton/language/random.py +12 -2
  47. triton/language/semantic.py +1706 -1770
  48. triton/language/standard.py +116 -51
  49. triton/runtime/autotuner.py +117 -59
  50. triton/runtime/build.py +76 -12
  51. triton/runtime/cache.py +18 -47
  52. triton/runtime/driver.py +32 -29
  53. triton/runtime/interpreter.py +72 -35
  54. triton/runtime/jit.py +146 -110
  55. triton/runtime/tcc/lib/python310.def +1610 -0
  56. triton/runtime/tcc/lib/python311.def +1633 -0
  57. triton/runtime/tcc/lib/python312.def +1703 -0
  58. triton/runtime/tcc/lib/python313.def +1651 -0
  59. triton/runtime/tcc/lib/python313t.def +1656 -0
  60. triton/runtime/tcc/lib/python39.def +1644 -0
  61. triton/runtime/tcc/lib/python3t.def +905 -0
  62. triton/testing.py +16 -12
  63. triton/tools/disasm.py +3 -4
  64. triton/tools/tensor_descriptor.py +36 -0
  65. triton/windows_utils.py +14 -6
  66. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/METADATA +7 -2
  67. triton_windows-3.4.0.post20.dist-info/RECORD +186 -0
  68. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/WHEEL +1 -1
  69. triton_windows-3.4.0.post20.dist-info/entry_points.txt +3 -0
  70. triton_windows-3.4.0.post20.dist-info/licenses/LICENSE +23 -0
  71. triton_windows-3.4.0.post20.dist-info/top_level.txt +1 -0
  72. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  73. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
  74. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
  75. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
  76. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  77. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  78. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  79. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
  80. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  81. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
  82. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  83. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  84. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  85. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  86. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  87. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  88. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  89. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  90. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
  91. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
  92. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  93. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  94. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  95. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  96. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  97. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  98. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  99. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
  100. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  101. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  102. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  103. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  104. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  105. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
  106. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
  107. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
  108. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  109. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  110. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  111. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  112. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  113. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  114. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  115. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  116. triton/backends/amd/include/hip/device_functions.h +0 -38
  117. triton/backends/amd/include/hip/driver_types.h +0 -468
  118. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  119. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  120. triton/backends/amd/include/hip/hip_common.h +0 -100
  121. triton/backends/amd/include/hip/hip_complex.h +0 -38
  122. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  123. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  124. triton/backends/amd/include/hip/hip_ext.h +0 -161
  125. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  126. triton/backends/amd/include/hip/hip_fp8.h +0 -33
  127. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  128. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  129. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  130. triton/backends/amd/include/hip/hip_profile.h +0 -27
  131. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  132. triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
  133. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  134. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  135. triton/backends/amd/include/hip/hip_version.h +0 -17
  136. triton/backends/amd/include/hip/hiprtc.h +0 -421
  137. triton/backends/amd/include/hip/library_types.h +0 -78
  138. triton/backends/amd/include/hip/math_functions.h +0 -42
  139. triton/backends/amd/include/hip/surface_types.h +0 -63
  140. triton/backends/amd/include/hip/texture_types.h +0 -194
  141. triton/backends/amd/include/hsa/Brig.h +0 -1131
  142. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  143. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
  144. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  145. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  146. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  147. triton/backends/amd/include/hsa/hsa.h +0 -5738
  148. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  149. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
  150. triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
  151. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
  152. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  153. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  154. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  155. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  156. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
  157. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  158. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
  159. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
  160. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
  161. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  162. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  163. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  164. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  165. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  166. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  167. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  168. triton/backends/amd/include/roctracer/roctx.h +0 -229
  169. triton/language/_utils.py +0 -21
  170. triton/language/extra/cuda/_experimental_tma.py +0 -106
  171. triton/tools/experimental_descriptor.py +0 -32
  172. triton_windows-3.3.0.post19.dist-info/RECORD +0 -253
  173. triton_windows-3.3.0.post19.dist-info/top_level.txt +0 -14
@@ -1,2226 +0,0 @@
1
- /*
2
- Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved.
3
-
4
- Permission is hereby granted, free of charge, to any person obtaining a copy
5
- of this software and associated documentation files (the "Software"), to deal
6
- in the Software without restriction, including without limitation the rights
7
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
- copies of the Software, and to permit persons to whom the Software is
9
- furnished to do so, subject to the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be included in
12
- all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
- THE SOFTWARE.
21
- */
22
-
23
- /**
24
- * @file amd_detail/hip_vector_types.h
25
- * @brief Defines the different newt vector types for HIP runtime.
26
- */
27
-
28
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
29
- #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
30
-
31
- #include "hip/amd_detail/host_defines.h"
32
-
33
- #if defined(__HIPCC_RTC__)
34
- #define __HOST_DEVICE__ __device__
35
- #else
36
- #define __HOST_DEVICE__ __host__ __device__
37
- #endif
38
-
39
- #if defined(__has_attribute)
40
- #if __has_attribute(ext_vector_type)
41
- #define __HIP_USE_NATIVE_VECTOR__ 1
42
- #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
43
- #else
44
- #define __NATIVE_VECTOR__(n, T) T[n]
45
- #endif
46
-
47
- #if defined(__cplusplus)
48
- #if !defined(__HIPCC_RTC__)
49
- #include <array>
50
- #include <iosfwd>
51
- #include <type_traits>
52
- #else
53
- namespace std {
54
- using ::size_t;
55
-
56
- template <class _Tp, _Tp __v> struct integral_constant {
57
- static constexpr const _Tp value = __v;
58
- typedef _Tp value_type;
59
- typedef integral_constant type;
60
- constexpr operator value_type() const { return value; }
61
- constexpr value_type operator()() const { return value; }
62
- };
63
- template <class _Tp, _Tp __v> constexpr const _Tp integral_constant<_Tp, __v>::value;
64
-
65
- typedef integral_constant<bool, true> true_type;
66
- typedef integral_constant<bool, false> false_type;
67
-
68
- template <bool B> using bool_constant = integral_constant<bool, B>;
69
- typedef bool_constant<true> true_type;
70
- typedef bool_constant<false> false_type;
71
-
72
- template <bool __B, class __T = void> struct enable_if {};
73
- template <class __T> struct enable_if<true, __T> { typedef __T type; };
74
-
75
- template<bool _B> struct true_or_false_type : public false_type {};
76
- template<> struct true_or_false_type<true> : public true_type {};
77
-
78
- template <class _Tp> struct is_integral : public false_type {};
79
- template <> struct is_integral<bool> : public true_type {};
80
- template <> struct is_integral<char> : public true_type {};
81
- template <> struct is_integral<signed char> : public true_type {};
82
- template <> struct is_integral<unsigned char> : public true_type {};
83
- template <> struct is_integral<wchar_t> : public true_type {};
84
- template <> struct is_integral<short> : public true_type {};
85
- template <> struct is_integral<unsigned short> : public true_type {};
86
- template <> struct is_integral<int> : public true_type {};
87
- template <> struct is_integral<unsigned int> : public true_type {};
88
- template <> struct is_integral<long> : public true_type {};
89
- template <> struct is_integral<unsigned long> : public true_type {};
90
- template <> struct is_integral<long long> : public true_type {};
91
- template <> struct is_integral<unsigned long long> : public true_type {};
92
-
93
- template <class _Tp> struct is_arithmetic : public false_type {};
94
- template <> struct is_arithmetic<bool> : public true_type {};
95
- template <> struct is_arithmetic<char> : public true_type {};
96
- template <> struct is_arithmetic<signed char> : public true_type {};
97
- template <> struct is_arithmetic<unsigned char> : public true_type {};
98
- template <> struct is_arithmetic<wchar_t> : public true_type {};
99
- template <> struct is_arithmetic<short> : public true_type {};
100
- template <> struct is_arithmetic<unsigned short> : public true_type {};
101
- template <> struct is_arithmetic<int> : public true_type {};
102
- template <> struct is_arithmetic<unsigned int> : public true_type {};
103
- template <> struct is_arithmetic<long> : public true_type {};
104
- template <> struct is_arithmetic<unsigned long> : public true_type {};
105
- template <> struct is_arithmetic<long long> : public true_type {};
106
- template <> struct is_arithmetic<unsigned long long> : public true_type {};
107
- template <> struct is_arithmetic<float> : public true_type {};
108
- template <> struct is_arithmetic<double> : public true_type {};
109
-
110
- template<typename _Tp> struct is_floating_point : public false_type {};
111
- template<> struct is_floating_point<float> : public true_type {};
112
- template<> struct is_floating_point<double> : public true_type {};
113
- template<> struct is_floating_point<long double> : public true_type {};
114
-
115
- template <typename __T, typename __U> struct is_same : public false_type {};
116
- template <typename __T> struct is_same<__T, __T> : public true_type {};
117
-
118
- template<typename _Tp, bool = is_arithmetic<_Tp>::value>
119
- struct is_signed : public false_type {};
120
- template<typename _Tp>
121
- struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> {};
122
-
123
- template <class _T1, class _T2> struct is_convertible
124
- : public true_or_false_type<__is_convertible_to(_T1, _T2)> {};
125
-
126
- template<typename _CharT> struct char_traits;
127
- template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_istream;
128
- template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_ostream;
129
- typedef basic_istream<char> istream;
130
- typedef basic_ostream<char> ostream;
131
-
132
- template <typename __T> struct is_scalar : public integral_constant<bool, __is_scalar(__T)> {};
133
- } // Namespace std.
134
- #endif // defined(__HIPCC_RTC__)
135
-
136
- namespace hip_impl {
137
- inline
138
- constexpr
139
- unsigned int next_pot(unsigned int x) {
140
- // Precondition: x > 1.
141
- return 1u << (32u - __builtin_clz(x - 1u));
142
- }
143
- } // Namespace hip_impl.
144
-
145
- template<typename T, unsigned int n> struct HIP_vector_base;
146
-
147
- template<typename T>
148
- struct HIP_vector_base<T, 1> {
149
- using Native_vec_ = __NATIVE_VECTOR__(1, T);
150
-
151
- union {
152
- Native_vec_ data;
153
- struct {
154
- T x;
155
- };
156
- };
157
-
158
- using value_type = T;
159
-
160
- __HOST_DEVICE__
161
- HIP_vector_base() = default;
162
- __HOST_DEVICE__
163
- explicit
164
- constexpr
165
- HIP_vector_base(T x_) noexcept : data{x_} {}
166
- __HOST_DEVICE__
167
- constexpr
168
- HIP_vector_base(const HIP_vector_base&) = default;
169
- __HOST_DEVICE__
170
- constexpr
171
- HIP_vector_base(HIP_vector_base&&) = default;
172
- __HOST_DEVICE__
173
- ~HIP_vector_base() = default;
174
- __HOST_DEVICE__
175
- HIP_vector_base& operator=(const HIP_vector_base&) = default;
176
- };
177
-
178
- template<typename T>
179
- struct HIP_vector_base<T, 2> {
180
- using Native_vec_ = __NATIVE_VECTOR__(2, T);
181
-
182
- union
183
- #if !__has_attribute(ext_vector_type)
184
- alignas(hip_impl::next_pot(2 * sizeof(T)))
185
- #endif
186
- {
187
- Native_vec_ data;
188
- struct {
189
- T x;
190
- T y;
191
- };
192
- };
193
-
194
- using value_type = T;
195
-
196
- __HOST_DEVICE__
197
- HIP_vector_base() = default;
198
- __HOST_DEVICE__
199
- explicit
200
- constexpr
201
- HIP_vector_base(T x_) noexcept : data{x_, x_} {}
202
- __HOST_DEVICE__
203
- constexpr
204
- HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
205
- __HOST_DEVICE__
206
- constexpr
207
- HIP_vector_base(const HIP_vector_base&) = default;
208
- __HOST_DEVICE__
209
- constexpr
210
- HIP_vector_base(HIP_vector_base&&) = default;
211
- __HOST_DEVICE__
212
- ~HIP_vector_base() = default;
213
- __HOST_DEVICE__
214
- HIP_vector_base& operator=(const HIP_vector_base&) = default;
215
- };
216
-
217
- template<typename T>
218
- struct HIP_vector_base<T, 3> {
219
- struct Native_vec_ {
220
- T d[3];
221
-
222
- __HOST_DEVICE__
223
- Native_vec_() = default;
224
-
225
- __HOST_DEVICE__
226
- explicit
227
- constexpr
228
- Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
229
- __HOST_DEVICE__
230
- constexpr
231
- Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
232
- __HOST_DEVICE__
233
- constexpr
234
- Native_vec_(const Native_vec_&) = default;
235
- __HOST_DEVICE__
236
- constexpr
237
- Native_vec_(Native_vec_&&) = default;
238
- __HOST_DEVICE__
239
- ~Native_vec_() = default;
240
-
241
- __HOST_DEVICE__
242
- Native_vec_& operator=(const Native_vec_&) = default;
243
- __HOST_DEVICE__
244
- Native_vec_& operator=(Native_vec_&&) = default;
245
-
246
- __HOST_DEVICE__
247
- T& operator[](unsigned int idx) noexcept { return d[idx]; }
248
- __HOST_DEVICE__
249
- T operator[](unsigned int idx) const noexcept { return d[idx]; }
250
-
251
- __HOST_DEVICE__
252
- Native_vec_& operator+=(const Native_vec_& x_) noexcept
253
- {
254
- for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
255
- return *this;
256
- }
257
- __HOST_DEVICE__
258
- Native_vec_& operator-=(const Native_vec_& x_) noexcept
259
- {
260
- for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
261
- return *this;
262
- }
263
-
264
- __HOST_DEVICE__
265
- Native_vec_& operator*=(const Native_vec_& x_) noexcept
266
- {
267
- for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
268
- return *this;
269
- }
270
- __HOST_DEVICE__
271
- Native_vec_& operator/=(const Native_vec_& x_) noexcept
272
- {
273
- for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
274
- return *this;
275
- }
276
-
277
- template<
278
- typename U = T,
279
- typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
280
- __HOST_DEVICE__
281
- Native_vec_ operator-() const noexcept
282
- {
283
- auto r{*this};
284
- for (auto&& x : r.d) x = -x;
285
- return r;
286
- }
287
-
288
- template<
289
- typename U = T,
290
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
291
- __HOST_DEVICE__
292
- Native_vec_ operator~() const noexcept
293
- {
294
- auto r{*this};
295
- for (auto&& x : r.d) x = ~x;
296
- return r;
297
- }
298
- template<
299
- typename U = T,
300
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
301
- __HOST_DEVICE__
302
- Native_vec_& operator%=(const Native_vec_& x_) noexcept
303
- {
304
- for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
305
- return *this;
306
- }
307
- template<
308
- typename U = T,
309
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
310
- __HOST_DEVICE__
311
- Native_vec_& operator^=(const Native_vec_& x_) noexcept
312
- {
313
- for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
314
- return *this;
315
- }
316
- template<
317
- typename U = T,
318
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
319
- __HOST_DEVICE__
320
- Native_vec_& operator|=(const Native_vec_& x_) noexcept
321
- {
322
- for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
323
- return *this;
324
- }
325
- template<
326
- typename U = T,
327
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
328
- __HOST_DEVICE__
329
- Native_vec_& operator&=(const Native_vec_& x_) noexcept
330
- {
331
- for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
332
- return *this;
333
- }
334
- template<
335
- typename U = T,
336
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
337
- __HOST_DEVICE__
338
- Native_vec_& operator>>=(const Native_vec_& x_) noexcept
339
- {
340
- for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
341
- return *this;
342
- }
343
- template<
344
- typename U = T,
345
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
346
- __HOST_DEVICE__
347
- Native_vec_& operator<<=(const Native_vec_& x_) noexcept
348
- {
349
- for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
350
- return *this;
351
- }
352
- #if defined (__INTEL_COMPILER)
353
- typedef struct {
354
- int values[4];
355
- } _Vec3_cmp;
356
- using Vec3_cmp = _Vec3_cmp;
357
- #else
358
- using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
359
- #endif //INTEL
360
- __HOST_DEVICE__
361
- Vec3_cmp operator==(const Native_vec_& x_) const noexcept
362
- {
363
- return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
364
- }
365
- };
366
-
367
- union {
368
- Native_vec_ data;
369
- struct {
370
- T x;
371
- T y;
372
- T z;
373
- };
374
- };
375
-
376
- using value_type = T;
377
-
378
- __HOST_DEVICE__
379
- HIP_vector_base() = default;
380
- __HOST_DEVICE__
381
- explicit
382
- constexpr
383
- HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
384
- __HOST_DEVICE__
385
- constexpr
386
- HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
387
- __HOST_DEVICE__
388
- constexpr
389
- HIP_vector_base(const HIP_vector_base&) = default;
390
- __HOST_DEVICE__
391
- constexpr
392
- HIP_vector_base(HIP_vector_base&&) = default;
393
- __HOST_DEVICE__
394
- ~HIP_vector_base() = default;
395
-
396
- __HOST_DEVICE__
397
- HIP_vector_base& operator=(const HIP_vector_base&) = default;
398
- __HOST_DEVICE__
399
- HIP_vector_base& operator=(HIP_vector_base&&) = default;
400
- };
401
-
402
- template<typename T>
403
- struct HIP_vector_base<T, 4> {
404
- using Native_vec_ = __NATIVE_VECTOR__(4, T);
405
-
406
- union
407
- #if !__has_attribute(ext_vector_type)
408
- alignas(hip_impl::next_pot(4 * sizeof(T)))
409
- #endif
410
- {
411
- Native_vec_ data;
412
- struct {
413
- T x;
414
- T y;
415
- T z;
416
- T w;
417
- };
418
- };
419
-
420
- using value_type = T;
421
-
422
- __HOST_DEVICE__
423
- HIP_vector_base() = default;
424
- __HOST_DEVICE__
425
- explicit
426
- constexpr
427
- HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
428
- __HOST_DEVICE__
429
- constexpr
430
- HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
431
- __HOST_DEVICE__
432
- constexpr
433
- HIP_vector_base(const HIP_vector_base&) = default;
434
- __HOST_DEVICE__
435
- constexpr
436
- HIP_vector_base(HIP_vector_base&&) = default;
437
- __HOST_DEVICE__
438
- ~HIP_vector_base() = default;
439
- __HOST_DEVICE__
440
- HIP_vector_base& operator=(const HIP_vector_base&) = default;
441
- };
442
-
443
- template<typename T, unsigned int rank>
444
- struct HIP_vector_type : public HIP_vector_base<T, rank> {
445
- using HIP_vector_base<T, rank>::data;
446
- using typename HIP_vector_base<T, rank>::Native_vec_;
447
-
448
- __HOST_DEVICE__
449
- HIP_vector_type() = default;
450
- template<
451
- typename U,
452
- typename std::enable_if<
453
- std::is_convertible<U, T>::value>::type* = nullptr>
454
- __HOST_DEVICE__
455
- explicit
456
- constexpr
457
- HIP_vector_type(U x_) noexcept
458
- : HIP_vector_base<T, rank>{static_cast<T>(x_)}
459
- {}
460
- template< // TODO: constrain based on type as well.
461
- typename... Us,
462
- typename std::enable_if<
463
- (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
464
- __HOST_DEVICE__
465
- constexpr
466
- HIP_vector_type(Us... xs) noexcept
467
- : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
468
- {}
469
- __HOST_DEVICE__
470
- constexpr
471
- HIP_vector_type(const HIP_vector_type&) = default;
472
- __HOST_DEVICE__
473
- constexpr
474
- HIP_vector_type(HIP_vector_type&&) = default;
475
- __HOST_DEVICE__
476
- ~HIP_vector_type() = default;
477
-
478
- __HOST_DEVICE__
479
- HIP_vector_type& operator=(const HIP_vector_type&) = default;
480
- __HOST_DEVICE__
481
- HIP_vector_type& operator=(HIP_vector_type&&) = default;
482
-
483
- // Operators
484
- __HOST_DEVICE__
485
- HIP_vector_type& operator++() noexcept
486
- {
487
- return *this += HIP_vector_type{1};
488
- }
489
- __HOST_DEVICE__
490
- HIP_vector_type operator++(int) noexcept
491
- {
492
- auto tmp(*this);
493
- ++*this;
494
- return tmp;
495
- }
496
-
497
- __HOST_DEVICE__
498
- HIP_vector_type& operator--() noexcept
499
- {
500
- return *this -= HIP_vector_type{1};
501
- }
502
- __HOST_DEVICE__
503
- HIP_vector_type operator--(int) noexcept
504
- {
505
- auto tmp(*this);
506
- --*this;
507
- return tmp;
508
- }
509
-
510
- __HOST_DEVICE__
511
- HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
512
- {
513
- #if __HIP_USE_NATIVE_VECTOR__
514
- data += x.data;
515
- #else
516
- for (auto i = 0u; i != rank; ++i) data[i] += x.data[i];
517
- #endif
518
- return *this;
519
- }
520
- template<
521
- typename U,
522
- typename std::enable_if<
523
- std::is_convertible<U, T>{}>::type* = nullptr>
524
- __HOST_DEVICE__
525
- HIP_vector_type& operator+=(U x) noexcept
526
- {
527
- return *this += HIP_vector_type{x};
528
- }
529
-
530
- __HOST_DEVICE__
531
- HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
532
- {
533
- #if __HIP_USE_NATIVE_VECTOR__
534
- data -= x.data;
535
- #else
536
- for (auto i = 0u; i != rank; ++i) data[i] -= x.data[i];
537
- #endif
538
- return *this;
539
- }
540
- template<
541
- typename U,
542
- typename std::enable_if<
543
- std::is_convertible<U, T>{}>::type* = nullptr>
544
- __HOST_DEVICE__
545
- HIP_vector_type& operator-=(U x) noexcept
546
- {
547
- return *this -= HIP_vector_type{x};
548
- }
549
-
550
- __HOST_DEVICE__
551
- HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
552
- {
553
- #if __HIP_USE_NATIVE_VECTOR__
554
- data *= x.data;
555
- #else
556
- for (auto i = 0u; i != rank; ++i) data[i] *= x.data[i];
557
- #endif
558
- return *this;
559
- }
560
-
561
- friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
562
- HIP_vector_type x, const HIP_vector_type& y) noexcept
563
- {
564
- return HIP_vector_type{ x } *= y;
565
- }
566
-
567
- template<
568
- typename U,
569
- typename std::enable_if<
570
- std::is_convertible<U, T>{}>::type* = nullptr>
571
- __HOST_DEVICE__
572
- HIP_vector_type& operator*=(U x) noexcept
573
- {
574
- return *this *= HIP_vector_type{x};
575
- }
576
-
577
- friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
578
- HIP_vector_type x, const HIP_vector_type& y) noexcept
579
- {
580
- return HIP_vector_type{ x } /= y;
581
- }
582
-
583
- __HOST_DEVICE__
584
- HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
585
- {
586
- #if __HIP_USE_NATIVE_VECTOR__
587
- data /= x.data;
588
- #else
589
- for (auto i = 0u; i != rank; ++i) data[i] /= x.data[i];
590
- #endif
591
- return *this;
592
- }
593
- template<
594
- typename U,
595
- typename std::enable_if<
596
- std::is_convertible<U, T>{}>::type* = nullptr>
597
- __HOST_DEVICE__
598
- HIP_vector_type& operator/=(U x) noexcept
599
- {
600
- return *this /= HIP_vector_type{x};
601
- }
602
-
603
- template<
604
- typename U = T,
605
- typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
606
- __HOST_DEVICE__
607
- HIP_vector_type operator-() const noexcept
608
- {
609
- auto tmp(*this);
610
- #if __HIP_USE_NATIVE_VECTOR__
611
- tmp.data = -tmp.data;
612
- #else
613
- for (auto i = 0u; i != rank; ++i) tmp.data[i] = -tmp.data[i];
614
- #endif
615
- return tmp;
616
- }
617
-
618
- template<
619
- typename U = T,
620
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
621
- __HOST_DEVICE__
622
- HIP_vector_type operator~() const noexcept
623
- {
624
- HIP_vector_type r{*this};
625
- #if __HIP_USE_NATIVE_VECTOR__
626
- r.data = ~r.data;
627
- #else
628
- for (auto i = 0u; i != rank; ++i) r.data[i] = ~r.data[i];
629
- #endif
630
- return r;
631
- }
632
-
633
- template<
634
- typename U = T,
635
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
636
- __HOST_DEVICE__
637
- HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
638
- {
639
- #if __HIP_USE_NATIVE_VECTOR__
640
- data %= x.data;
641
- #else
642
- for (auto i = 0u; i != rank; ++i) data[i] %= x.data[i];
643
- #endif
644
- return *this;
645
- }
646
-
647
- template<
648
- typename U = T,
649
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
650
- __HOST_DEVICE__
651
- HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
652
- {
653
- #if __HIP_USE_NATIVE_VECTOR__
654
- data ^= x.data;
655
- #else
656
- for (auto i = 0u; i != rank; ++i) data[i] ^= x.data[i];
657
- #endif
658
- return *this;
659
- }
660
-
661
- template<
662
- typename U = T,
663
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
664
- __HOST_DEVICE__
665
- HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
666
- {
667
- #if __HIP_USE_NATIVE_VECTOR__
668
- data |= x.data;
669
- #else
670
- for (auto i = 0u; i != rank; ++i) data[i] |= x.data[i];
671
- #endif
672
- return *this;
673
- }
674
-
675
- template<
676
- typename U = T,
677
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
678
- __HOST_DEVICE__
679
- HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
680
- {
681
- #if __HIP_USE_NATIVE_VECTOR__
682
- data &= x.data;
683
- #else
684
- for (auto i = 0u; i != rank; ++i) data[i] &= x.data[i];
685
- #endif
686
- return *this;
687
- }
688
-
689
- template<
690
- typename U = T,
691
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
692
- __HOST_DEVICE__
693
- HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
694
- {
695
- #if __HIP_USE_NATIVE_VECTOR__
696
- data >>= x.data;
697
- #else
698
- for (auto i = 0u; i != rank; ++i) data[i] >>= x.data[i];
699
- #endif
700
- return *this;
701
- }
702
-
703
- template<
704
- typename U = T,
705
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
706
- __HOST_DEVICE__
707
- HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
708
- {
709
- #if __HIP_USE_NATIVE_VECTOR__
710
- data <<= x.data;
711
- #else
712
- for (auto i = 0u; i != rank; ++i) data[i] <<= x.data[i];
713
- #endif
714
- return *this;
715
- }
716
- };
717
-
718
- template<typename T, unsigned int n>
719
- __HOST_DEVICE__
720
- inline
721
- constexpr
722
- HIP_vector_type<T, n> operator+(
723
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
724
- {
725
- return HIP_vector_type<T, n>{x} += y;
726
- }
727
- template<typename T, unsigned int n, typename U>
728
- __HOST_DEVICE__
729
- inline
730
- constexpr
731
- HIP_vector_type<T, n> operator+(
732
- const HIP_vector_type<T, n>& x, U y) noexcept
733
- {
734
- return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
735
- }
736
- template<typename T, unsigned int n, typename U>
737
- __HOST_DEVICE__
738
- inline
739
- constexpr
740
- HIP_vector_type<T, n> operator+(
741
- U x, const HIP_vector_type<T, n>& y) noexcept
742
- {
743
- return HIP_vector_type<T, n>{x} += y;
744
- }
745
-
746
- template<typename T, unsigned int n>
747
- __HOST_DEVICE__
748
- inline
749
- constexpr
750
- HIP_vector_type<T, n> operator-(
751
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
752
- {
753
- return HIP_vector_type<T, n>{x} -= y;
754
- }
755
- template<typename T, unsigned int n, typename U>
756
- __HOST_DEVICE__
757
- inline
758
- constexpr
759
- HIP_vector_type<T, n> operator-(
760
- const HIP_vector_type<T, n>& x, U y) noexcept
761
- {
762
- return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
763
- }
764
- template<typename T, unsigned int n, typename U>
765
- __HOST_DEVICE__
766
- inline
767
- constexpr
768
- HIP_vector_type<T, n> operator-(
769
- U x, const HIP_vector_type<T, n>& y) noexcept
770
- {
771
- return HIP_vector_type<T, n>{x} -= y;
772
- }
773
-
774
- template<typename T, unsigned int n, typename U>
775
- __HOST_DEVICE__
776
- inline
777
- constexpr
778
- HIP_vector_type<T, n> operator*(
779
- const HIP_vector_type<T, n>& x, U y) noexcept
780
- {
781
- return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
782
- }
783
- template<typename T, unsigned int n, typename U>
784
- __HOST_DEVICE__
785
- inline
786
- constexpr
787
- HIP_vector_type<T, n> operator*(
788
- U x, const HIP_vector_type<T, n>& y) noexcept
789
- {
790
- return HIP_vector_type<T, n>{x} *= y;
791
- }
792
-
793
- template<typename T, unsigned int n, typename U>
794
- __HOST_DEVICE__
795
- inline
796
- constexpr
797
- HIP_vector_type<T, n> operator/(
798
- const HIP_vector_type<T, n>& x, U y) noexcept
799
- {
800
- return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
801
- }
802
- template<typename T, unsigned int n, typename U>
803
- __HOST_DEVICE__
804
- inline
805
- constexpr
806
- HIP_vector_type<T, n> operator/(
807
- U x, const HIP_vector_type<T, n>& y) noexcept
808
- {
809
- return HIP_vector_type<T, n>{x} /= y;
810
- }
811
-
812
- template<typename V>
813
- __HOST_DEVICE__
814
- inline
815
- constexpr
816
- bool _hip_compare(const V& x, const V& y, int n) noexcept
817
- {
818
- return
819
- (n == -1) ? true : ((x[n] != y[n]) ? false : _hip_compare(x, y, n - 1));
820
- }
821
-
822
- template<typename T, unsigned int n>
823
- __HOST_DEVICE__
824
- inline
825
- constexpr
826
- bool operator==(
827
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
828
- {
829
- return _hip_compare(x.data, y.data, n - 1);
830
- }
831
- template<typename T, unsigned int n, typename U>
832
- __HOST_DEVICE__
833
- inline
834
- constexpr
835
- bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
836
- {
837
- return x == HIP_vector_type<T, n>{y};
838
- }
839
- template<typename T, unsigned int n, typename U>
840
- __HOST_DEVICE__
841
- inline
842
- constexpr
843
- bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
844
- {
845
- return HIP_vector_type<T, n>{x} == y;
846
- }
847
-
848
- template<typename T, unsigned int n>
849
- __HOST_DEVICE__
850
- inline
851
- constexpr
852
- bool operator!=(
853
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
854
- {
855
- return !(x == y);
856
- }
857
- template<typename T, unsigned int n, typename U>
858
- __HOST_DEVICE__
859
- inline
860
- constexpr
861
- bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
862
- {
863
- return !(x == y);
864
- }
865
- template<typename T, unsigned int n, typename U>
866
- __HOST_DEVICE__
867
- inline
868
- constexpr
869
- bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
870
- {
871
- return !(x == y);
872
- }
873
-
874
- template<
875
- typename T,
876
- unsigned int n,
877
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
878
- __HOST_DEVICE__
879
- inline
880
- constexpr
881
- HIP_vector_type<T, n> operator%(
882
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
883
- {
884
- return HIP_vector_type<T, n>{x} %= y;
885
- }
886
- template<
887
- typename T,
888
- unsigned int n,
889
- typename U,
890
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
891
- __HOST_DEVICE__
892
- inline
893
- constexpr
894
- HIP_vector_type<T, n> operator%(
895
- const HIP_vector_type<T, n>& x, U y) noexcept
896
- {
897
- return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
898
- }
899
- template<
900
- typename T,
901
- unsigned int n,
902
- typename U,
903
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
904
- __HOST_DEVICE__
905
- inline
906
- constexpr
907
- HIP_vector_type<T, n> operator%(
908
- U x, const HIP_vector_type<T, n>& y) noexcept
909
- {
910
- return HIP_vector_type<T, n>{x} %= y;
911
- }
912
-
913
- template<
914
- typename T,
915
- unsigned int n,
916
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
917
- __HOST_DEVICE__
918
- inline
919
- constexpr
920
- HIP_vector_type<T, n> operator^(
921
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
922
- {
923
- return HIP_vector_type<T, n>{x} ^= y;
924
- }
925
- template<
926
- typename T,
927
- unsigned int n,
928
- typename U,
929
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
930
- __HOST_DEVICE__
931
- inline
932
- constexpr
933
- HIP_vector_type<T, n> operator^(
934
- const HIP_vector_type<T, n>& x, U y) noexcept
935
- {
936
- return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
937
- }
938
- template<
939
- typename T,
940
- unsigned int n,
941
- typename U,
942
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
943
- __HOST_DEVICE__
944
- inline
945
- constexpr
946
- HIP_vector_type<T, n> operator^(
947
- U x, const HIP_vector_type<T, n>& y) noexcept
948
- {
949
- return HIP_vector_type<T, n>{x} ^= y;
950
- }
951
-
952
- template<
953
- typename T,
954
- unsigned int n,
955
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
956
- __HOST_DEVICE__
957
- inline
958
- constexpr
959
- HIP_vector_type<T, n> operator|(
960
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
961
- {
962
- return HIP_vector_type<T, n>{x} |= y;
963
- }
964
- template<
965
- typename T,
966
- unsigned int n,
967
- typename U,
968
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
969
- __HOST_DEVICE__
970
- inline
971
- constexpr
972
- HIP_vector_type<T, n> operator|(
973
- const HIP_vector_type<T, n>& x, U y) noexcept
974
- {
975
- return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
976
- }
977
- template<
978
- typename T,
979
- unsigned int n,
980
- typename U,
981
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
982
- __HOST_DEVICE__
983
- inline
984
- constexpr
985
- HIP_vector_type<T, n> operator|(
986
- U x, const HIP_vector_type<T, n>& y) noexcept
987
- {
988
- return HIP_vector_type<T, n>{x} |= y;
989
- }
990
-
991
- template<
992
- typename T,
993
- unsigned int n,
994
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
995
- __HOST_DEVICE__
996
- inline
997
- constexpr
998
- HIP_vector_type<T, n> operator&(
999
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1000
- {
1001
- return HIP_vector_type<T, n>{x} &= y;
1002
- }
1003
- template<
1004
- typename T,
1005
- unsigned int n,
1006
- typename U,
1007
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1008
- __HOST_DEVICE__
1009
- inline
1010
- constexpr
1011
- HIP_vector_type<T, n> operator&(
1012
- const HIP_vector_type<T, n>& x, U y) noexcept
1013
- {
1014
- return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1015
- }
1016
- template<
1017
- typename T,
1018
- unsigned int n,
1019
- typename U,
1020
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1021
- __HOST_DEVICE__
1022
- inline
1023
- constexpr
1024
- HIP_vector_type<T, n> operator&(
1025
- U x, const HIP_vector_type<T, n>& y) noexcept
1026
- {
1027
- return HIP_vector_type<T, n>{x} &= y;
1028
- }
1029
-
1030
- template<
1031
- typename T,
1032
- unsigned int n,
1033
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1034
- __HOST_DEVICE__
1035
- inline
1036
- constexpr
1037
- HIP_vector_type<T, n> operator>>(
1038
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1039
- {
1040
- return HIP_vector_type<T, n>{x} >>= y;
1041
- }
1042
- template<
1043
- typename T,
1044
- unsigned int n,
1045
- typename U,
1046
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1047
- __HOST_DEVICE__
1048
- inline
1049
- constexpr
1050
- HIP_vector_type<T, n> operator>>(
1051
- const HIP_vector_type<T, n>& x, U y) noexcept
1052
- {
1053
- return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1054
- }
1055
- template<
1056
- typename T,
1057
- unsigned int n,
1058
- typename U,
1059
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1060
- __HOST_DEVICE__
1061
- inline
1062
- constexpr
1063
- HIP_vector_type<T, n> operator>>(
1064
- U x, const HIP_vector_type<T, n>& y) noexcept
1065
- {
1066
- return HIP_vector_type<T, n>{x} >>= y;
1067
- }
1068
-
1069
- template<
1070
- typename T,
1071
- unsigned int n,
1072
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1073
- __HOST_DEVICE__
1074
- inline
1075
- constexpr
1076
- HIP_vector_type<T, n> operator<<(
1077
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1078
- {
1079
- return HIP_vector_type<T, n>{x} <<= y;
1080
- }
1081
- template<
1082
- typename T,
1083
- unsigned int n,
1084
- typename U,
1085
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1086
- __HOST_DEVICE__
1087
- inline
1088
- constexpr
1089
- HIP_vector_type<T, n> operator<<(
1090
- const HIP_vector_type<T, n>& x, U y) noexcept
1091
- {
1092
- return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1093
- }
1094
- template<
1095
- typename T,
1096
- unsigned int n,
1097
- typename U,
1098
- typename std::enable_if<std::is_arithmetic<U>::value>::type,
1099
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1100
- __HOST_DEVICE__
1101
- inline
1102
- constexpr
1103
- HIP_vector_type<T, n> operator<<(
1104
- U x, const HIP_vector_type<T, n>& y) noexcept
1105
- {
1106
- return HIP_vector_type<T, n>{x} <<= y;
1107
- }
1108
-
1109
- /*
1110
- * Map HIP_vector_type<U, rankU> to HIP_vector_type<T, rankT>
1111
- */
1112
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1113
- __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 1 && rankU >= 1),
1114
- const HIP_vector_type<T, rankT>>::type
1115
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1116
- return HIP_vector_type<T, rankT>(static_cast<T>(u.x));
1117
- };
1118
-
1119
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1120
- __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU == 1),
1121
- const HIP_vector_type<T, rankT>>::type
1122
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1123
- return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0));
1124
- };
1125
-
1126
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1127
- __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU >= 2),
1128
- const HIP_vector_type<T, rankT>>::type
1129
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1130
- return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y));
1131
- };
1132
-
1133
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1134
- __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 1),
1135
- const HIP_vector_type<T, rankT>>::type
1136
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1137
- return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0),
1138
- static_cast<T>(0), static_cast<T>(0));
1139
- };
1140
-
1141
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1142
- __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 2),
1143
- const HIP_vector_type<T, rankT>>::type
1144
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1145
- return HIP_vector_type<T, rankT>(static_cast<T>(u.x), static_cast<T>(u.y),
1146
- static_cast<T>(0), static_cast<T>(0));
1147
- };
1148
-
1149
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1150
- __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 4),
1151
- const HIP_vector_type<T, rankT>>::type
1152
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1153
- return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y),
1154
- static_cast<T>(u.z), static_cast<T>(u.w));
1155
- };
1156
-
1157
- #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1158
- using CUDA_name##1 = HIP_vector_type<T, 1>;\
1159
- using CUDA_name##2 = HIP_vector_type<T, 2>;\
1160
- using CUDA_name##3 = HIP_vector_type<T, 3>;\
1161
- using CUDA_name##4 = HIP_vector_type<T, 4>;
1162
- #else
1163
- #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1164
- typedef struct {\
1165
- T x;\
1166
- } CUDA_name##1;\
1167
- typedef struct {\
1168
- T x;\
1169
- T y;\
1170
- } CUDA_name##2;\
1171
- typedef struct {\
1172
- T x;\
1173
- T y;\
1174
- T z;\
1175
- } CUDA_name##3;\
1176
- typedef struct {\
1177
- T x;\
1178
- T y;\
1179
- T z;\
1180
- T w;\
1181
- } CUDA_name##4;
1182
- #endif
1183
-
1184
- __MAKE_VECTOR_TYPE__(uchar, unsigned char);
1185
- __MAKE_VECTOR_TYPE__(char, char);
1186
- __MAKE_VECTOR_TYPE__(ushort, unsigned short);
1187
- __MAKE_VECTOR_TYPE__(short, short);
1188
- __MAKE_VECTOR_TYPE__(uint, unsigned int);
1189
- __MAKE_VECTOR_TYPE__(int, int);
1190
- __MAKE_VECTOR_TYPE__(ulong, unsigned long);
1191
- __MAKE_VECTOR_TYPE__(long, long);
1192
- __MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
1193
- __MAKE_VECTOR_TYPE__(longlong, long long);
1194
- __MAKE_VECTOR_TYPE__(float, float);
1195
- __MAKE_VECTOR_TYPE__(double, double);
1196
-
1197
- #else // !defined(__has_attribute)
1198
-
1199
- #if defined(_MSC_VER)
1200
- #include <mmintrin.h>
1201
- #include <xmmintrin.h>
1202
- #include <emmintrin.h>
1203
- #include <immintrin.h>
1204
-
1205
- /*
1206
- this is for compatibility with CUDA as CUDA allows accessing vector components
1207
- in C++ program with MSVC
1208
- */
1209
- typedef union {
1210
- struct {
1211
- char x;
1212
- };
1213
- char data;
1214
- } char1;
1215
- typedef union {
1216
- struct {
1217
- char x;
1218
- char y;
1219
- };
1220
- char data[2];
1221
- } char2;
1222
- typedef union {
1223
- struct {
1224
- char x;
1225
- char y;
1226
- char z;
1227
- char w;
1228
- };
1229
- char data[4];
1230
- } char4;
1231
- typedef union {
1232
- struct {
1233
- char x;
1234
- char y;
1235
- char z;
1236
- };
1237
- char data[3];
1238
- } char3;
1239
- typedef union {
1240
- __m64 data;
1241
- } char8;
1242
- typedef union {
1243
- __m128i data;
1244
- } char16;
1245
-
1246
- typedef union {
1247
- struct {
1248
- unsigned char x;
1249
- };
1250
- unsigned char data;
1251
- } uchar1;
1252
- typedef union {
1253
- struct {
1254
- unsigned char x;
1255
- unsigned char y;
1256
- };
1257
- unsigned char data[2];
1258
- } uchar2;
1259
- typedef union {
1260
- struct {
1261
- unsigned char x;
1262
- unsigned char y;
1263
- unsigned char z;
1264
- unsigned char w;
1265
- };
1266
- unsigned char data[4];
1267
- } uchar4;
1268
- typedef union {
1269
- struct {
1270
- unsigned char x;
1271
- unsigned char y;
1272
- unsigned char z;
1273
- };
1274
- unsigned char data[3];
1275
- } uchar3;
1276
- typedef union {
1277
- __m64 data;
1278
- } uchar8;
1279
- typedef union {
1280
- __m128i data;
1281
- } uchar16;
1282
-
1283
- typedef union {
1284
- struct {
1285
- short x;
1286
- };
1287
- short data;
1288
- } short1;
1289
- typedef union {
1290
- struct {
1291
- short x;
1292
- short y;
1293
- };
1294
- short data[2];
1295
- } short2;
1296
- typedef union {
1297
- struct {
1298
- short x;
1299
- short y;
1300
- short z;
1301
- short w;
1302
- };
1303
- __m64 data;
1304
- } short4;
1305
- typedef union {
1306
- struct {
1307
- short x;
1308
- short y;
1309
- short z;
1310
- };
1311
- short data[3];
1312
- } short3;
1313
- typedef union {
1314
- __m128i data;
1315
- } short8;
1316
- typedef union {
1317
- __m128i data[2];
1318
- } short16;
1319
-
1320
- typedef union {
1321
- struct {
1322
- unsigned short x;
1323
- };
1324
- unsigned short data;
1325
- } ushort1;
1326
- typedef union {
1327
- struct {
1328
- unsigned short x;
1329
- unsigned short y;
1330
- };
1331
- unsigned short data[2];
1332
- } ushort2;
1333
- typedef union {
1334
- struct {
1335
- unsigned short x;
1336
- unsigned short y;
1337
- unsigned short z;
1338
- unsigned short w;
1339
- };
1340
- __m64 data;
1341
- } ushort4;
1342
- typedef union {
1343
- struct {
1344
- unsigned short x;
1345
- unsigned short y;
1346
- unsigned short z;
1347
- };
1348
- unsigned short data[3];
1349
- } ushort3;
1350
- typedef union {
1351
- __m128i data;
1352
- } ushort8;
1353
- typedef union {
1354
- __m128i data[2];
1355
- } ushort16;
1356
-
1357
- typedef union {
1358
- struct {
1359
- int x;
1360
- };
1361
- int data;
1362
- } int1;
1363
- typedef union {
1364
- struct {
1365
- int x;
1366
- int y;
1367
- };
1368
- __m64 data;
1369
- } int2;
1370
- typedef union {
1371
- struct {
1372
- int x;
1373
- int y;
1374
- int z;
1375
- int w;
1376
- };
1377
- __m128i data;
1378
- } int4;
1379
- typedef union {
1380
- struct {
1381
- int x;
1382
- int y;
1383
- int z;
1384
- };
1385
- int data[3];
1386
- } int3;
1387
- typedef union {
1388
- __m128i data[2];
1389
- } int8;
1390
- typedef union {
1391
- __m128i data[4];
1392
- } int16;
1393
-
1394
- typedef union {
1395
- struct {
1396
- unsigned int x;
1397
- };
1398
- unsigned int data;
1399
- } uint1;
1400
- typedef union {
1401
- struct {
1402
- unsigned int x;
1403
- unsigned int y;
1404
- };
1405
- __m64 data;
1406
- } uint2;
1407
- typedef union {
1408
- struct {
1409
- unsigned int x;
1410
- unsigned int y;
1411
- unsigned int z;
1412
- unsigned int w;
1413
- };
1414
- __m128i data;
1415
- } uint4;
1416
- typedef union {
1417
- struct {
1418
- unsigned int x;
1419
- unsigned int y;
1420
- unsigned int z;
1421
- };
1422
- unsigned int data[3];
1423
- } uint3;
1424
- typedef union {
1425
- __m128i data[2];
1426
- } uint8;
1427
- typedef union {
1428
- __m128i data[4];
1429
- } uint16;
1430
-
1431
- typedef union {
1432
- struct {
1433
- int x;
1434
- };
1435
- int data;
1436
- } long1;
1437
- typedef union {
1438
- struct {
1439
- int x;
1440
- int y;
1441
- };
1442
- __m64 data;
1443
- } long2;
1444
- typedef union {
1445
- struct {
1446
- int x;
1447
- int y;
1448
- int z;
1449
- int w;
1450
- };
1451
- __m128i data;
1452
- } long4;
1453
- typedef union {
1454
- struct {
1455
- int x;
1456
- int y;
1457
- int z;
1458
- };
1459
- int data[3];
1460
- } long3;
1461
- typedef union {
1462
- __m128i data[2];
1463
- } long8;
1464
- typedef union {
1465
- __m128i data[4];
1466
- } long16;
1467
-
1468
- typedef union {
1469
- struct {
1470
- unsigned int x;
1471
- };
1472
- unsigned int data;
1473
- } ulong1;
1474
- typedef union {
1475
- struct {
1476
- unsigned int x;
1477
- unsigned int y;
1478
- };
1479
- __m64 data;
1480
- } ulong2;
1481
- typedef union {
1482
- struct {
1483
- unsigned int x;
1484
- unsigned int y;
1485
- unsigned int z;
1486
- unsigned int w;
1487
- };
1488
- __m128i data;
1489
- } ulong4;
1490
- typedef union {
1491
- struct {
1492
- unsigned int x;
1493
- unsigned int y;
1494
- unsigned int z;
1495
- };
1496
- unsigned int data[3];
1497
- } ulong3;
1498
- typedef union {
1499
- __m128i data[2];
1500
- } ulong8;
1501
- typedef union {
1502
- __m128i data[4];
1503
- } ulong16;
1504
-
1505
- typedef union {
1506
- struct {
1507
- long long x;
1508
- };
1509
- __m64 data;
1510
- } longlong1;
1511
- typedef union {
1512
- struct {
1513
- long long x;
1514
- long long y;
1515
- };
1516
- __m128i data;
1517
- } longlong2;
1518
- typedef union {
1519
- struct {
1520
- long long x;
1521
- long long y;
1522
- long long z;
1523
- long long w;
1524
- };
1525
- __m128i data[2];
1526
- } longlong4;
1527
- typedef union {
1528
- struct {
1529
- long long x;
1530
- long long y;
1531
- long long z;
1532
- };
1533
- __m64 data[3];
1534
- } longlong3;
1535
- typedef union {
1536
- __m128i data[4];
1537
- } longlong8;
1538
- typedef union {
1539
- __m128i data[8];
1540
- } longlong16;
1541
-
1542
- typedef union {
1543
- struct {
1544
- __m64 x;
1545
- };
1546
- __m64 data;
1547
- } ulonglong1;
1548
- typedef union {
1549
- struct {
1550
- __m64 x;
1551
- __m64 y;
1552
- };
1553
- __m128i data;
1554
- } ulonglong2;
1555
- typedef union {
1556
- struct {
1557
- __m64 x;
1558
- __m64 y;
1559
- __m64 z;
1560
- __m64 w;
1561
- };
1562
- __m128i data[2];
1563
- } ulonglong4;
1564
- typedef union {
1565
- struct {
1566
- __m64 x;
1567
- __m64 y;
1568
- __m64 z;
1569
- };
1570
- __m64 data[3];
1571
- } ulonglong3;
1572
- typedef union {
1573
- __m128i data[4];
1574
- } ulonglong8;
1575
- typedef union {
1576
- __m128i data[8];
1577
- } ulonglong16;
1578
-
1579
- typedef union {
1580
- struct {
1581
- float x;
1582
- };
1583
- float data;
1584
- } float1;
1585
- typedef union {
1586
- struct {
1587
- float x;
1588
- float y;
1589
- };
1590
- __m64 data;
1591
- } float2;
1592
- typedef union {
1593
- struct {
1594
- float x;
1595
- float y;
1596
- float z;
1597
- float w;
1598
- };
1599
- __m128 data;
1600
- } float4;
1601
- typedef union {
1602
- struct {
1603
- float x;
1604
- float y;
1605
- float z;
1606
- };
1607
- float data[3];
1608
- } float3;
1609
- typedef union {
1610
- __m256 data;
1611
- } float8;
1612
- typedef union {
1613
- __m256 data[2];
1614
- } float16;
1615
-
1616
- typedef union {
1617
- struct {
1618
- double x;
1619
- };
1620
- double data;
1621
- } double1;
1622
- typedef union {
1623
- struct {
1624
- double x;
1625
- double y;
1626
- };
1627
- __m128d data;
1628
- } double2;
1629
- typedef union {
1630
- struct {
1631
- double x;
1632
- double y;
1633
- double z;
1634
- double w;
1635
- };
1636
- __m256d data;
1637
- } double4;
1638
- typedef union {
1639
- struct {
1640
- double x;
1641
- double y;
1642
- double z;
1643
- };
1644
- double data[3];
1645
- } double3;
1646
- typedef union {
1647
- __m256d data[2];
1648
- } double8;
1649
- typedef union {
1650
- __m256d data[4];
1651
- } double16;
1652
-
1653
- #else // !defined(_MSC_VER)
1654
-
1655
- /*
1656
- this is for compatibility with CUDA as CUDA allows accessing vector components
1657
- in C++ program with MSVC
1658
- */
1659
- typedef union {
1660
- struct {
1661
- char x;
1662
- };
1663
- char data;
1664
- } char1;
1665
- typedef union {
1666
- struct {
1667
- char x;
1668
- char y;
1669
- };
1670
- char data[2];
1671
- } char2;
1672
- typedef union {
1673
- struct {
1674
- char x;
1675
- char y;
1676
- char z;
1677
- char w;
1678
- };
1679
- char data[4];
1680
- } char4;
1681
- typedef union {
1682
- char data[8];
1683
- } char8;
1684
- typedef union {
1685
- char data[16];
1686
- } char16;
1687
- typedef union {
1688
- struct {
1689
- char x;
1690
- char y;
1691
- char z;
1692
- };
1693
- char data[3];
1694
- } char3;
1695
-
1696
- typedef union {
1697
- struct {
1698
- unsigned char x;
1699
- };
1700
- unsigned char data;
1701
- } uchar1;
1702
- typedef union {
1703
- struct {
1704
- unsigned char x;
1705
- unsigned char y;
1706
- };
1707
- unsigned char data[2];
1708
- } uchar2;
1709
- typedef union {
1710
- struct {
1711
- unsigned char x;
1712
- unsigned char y;
1713
- unsigned char z;
1714
- unsigned char w;
1715
- };
1716
- unsigned char data[4];
1717
- } uchar4;
1718
- typedef union {
1719
- unsigned char data[8];
1720
- } uchar8;
1721
- typedef union {
1722
- unsigned char data[16];
1723
- } uchar16;
1724
- typedef union {
1725
- struct {
1726
- unsigned char x;
1727
- unsigned char y;
1728
- unsigned char z;
1729
- };
1730
- unsigned char data[3];
1731
- } uchar3;
1732
-
1733
- typedef union {
1734
- struct {
1735
- short x;
1736
- };
1737
- short data;
1738
- } short1;
1739
- typedef union {
1740
- struct {
1741
- short x;
1742
- short y;
1743
- };
1744
- short data[2];
1745
- } short2;
1746
- typedef union {
1747
- struct {
1748
- short x;
1749
- short y;
1750
- short z;
1751
- short w;
1752
- };
1753
- short data[4];
1754
- } short4;
1755
- typedef union {
1756
- short data[8];
1757
- } short8;
1758
- typedef union {
1759
- short data[16];
1760
- } short16;
1761
- typedef union {
1762
- struct {
1763
- short x;
1764
- short y;
1765
- short z;
1766
- };
1767
- short data[3];
1768
- } short3;
1769
-
1770
- typedef union {
1771
- struct {
1772
- unsigned short x;
1773
- };
1774
- unsigned short data;
1775
- } ushort1;
1776
- typedef union {
1777
- struct {
1778
- unsigned short x;
1779
- unsigned short y;
1780
- };
1781
- unsigned short data[2];
1782
- } ushort2;
1783
- typedef union {
1784
- struct {
1785
- unsigned short x;
1786
- unsigned short y;
1787
- unsigned short z;
1788
- unsigned short w;
1789
- };
1790
- unsigned short data[4];
1791
- } ushort4;
1792
- typedef union {
1793
- unsigned short data[8];
1794
- } ushort8;
1795
- typedef union {
1796
- unsigned short data[16];
1797
- } ushort16;
1798
- typedef union {
1799
- struct {
1800
- unsigned short x;
1801
- unsigned short y;
1802
- unsigned short z;
1803
- };
1804
- unsigned short data[3];
1805
- } ushort3;
1806
-
1807
- typedef union {
1808
- struct {
1809
- int x;
1810
- };
1811
- int data;
1812
- } int1;
1813
- typedef union {
1814
- struct {
1815
- int x;
1816
- int y;
1817
- };
1818
- int data[2];
1819
- } int2;
1820
- typedef union {
1821
- struct {
1822
- int x;
1823
- int y;
1824
- int z;
1825
- int w;
1826
- };
1827
- int data[4];
1828
- } int4;
1829
- typedef union {
1830
- int data[8];
1831
- } int8;
1832
- typedef union {
1833
- int data[16];
1834
- } int16;
1835
- typedef union {
1836
- struct {
1837
- int x;
1838
- int y;
1839
- int z;
1840
- };
1841
- int data[3];
1842
- } int3;
1843
-
1844
- typedef union {
1845
- struct {
1846
- unsigned int x;
1847
- };
1848
- unsigned int data;
1849
- } uint1;
1850
- typedef union {
1851
- struct {
1852
- unsigned int x;
1853
- unsigned int y;
1854
- };
1855
- unsigned int data[2];
1856
- } uint2;
1857
- typedef union {
1858
- struct {
1859
- unsigned int x;
1860
- unsigned int y;
1861
- unsigned int z;
1862
- unsigned int w;
1863
- };
1864
- unsigned int data[4];
1865
- } uint4;
1866
- typedef union {
1867
- unsigned int data[8];
1868
- } uint8;
1869
- typedef union {
1870
- unsigned int data[16];
1871
- } uint16;
1872
- typedef union {
1873
- struct {
1874
- unsigned int x;
1875
- unsigned int y;
1876
- unsigned int z;
1877
- };
1878
- unsigned int data[3];
1879
- } uint3;
1880
-
1881
- typedef union {
1882
- struct {
1883
- long x;
1884
- };
1885
- long data;
1886
- } long1;
1887
- typedef union {
1888
- struct {
1889
- long x;
1890
- long y;
1891
- };
1892
- long data[2];
1893
- } long2;
1894
- typedef union {
1895
- struct {
1896
- long x;
1897
- long y;
1898
- long z;
1899
- long w;
1900
- };
1901
- long data[4];
1902
- } long4;
1903
- typedef union {
1904
- long data[8];
1905
- } long8;
1906
- typedef union {
1907
- long data[16];
1908
- } long16;
1909
- typedef union {
1910
- struct {
1911
- long x;
1912
- long y;
1913
- long z;
1914
- };
1915
- long data[3];
1916
- } long3;
1917
-
1918
- typedef union {
1919
- struct {
1920
- unsigned long x;
1921
- };
1922
- unsigned long data;
1923
- } ulong1;
1924
- typedef union {
1925
- struct {
1926
- unsigned long x;
1927
- unsigned long y;
1928
- };
1929
- unsigned long data[2];
1930
- } ulong2;
1931
- typedef union {
1932
- struct {
1933
- unsigned long x;
1934
- unsigned long y;
1935
- unsigned long z;
1936
- unsigned long w;
1937
- };
1938
- unsigned long data[4];
1939
- } ulong4;
1940
- typedef union {
1941
- unsigned long data[8];
1942
- } ulong8;
1943
- typedef union {
1944
- unsigned long data[16];
1945
- } ulong16;
1946
- typedef union {
1947
- struct {
1948
- unsigned long x;
1949
- unsigned long y;
1950
- unsigned long z;
1951
- };
1952
- unsigned long data[3];
1953
- } ulong3;
1954
-
1955
- typedef union {
1956
- struct {
1957
- long long x;
1958
- };
1959
- long long data;
1960
- } longlong1;
1961
- typedef union {
1962
- struct {
1963
- long long x;
1964
- long long y;
1965
- };
1966
- long long data[2];
1967
- } longlong2;
1968
- typedef union {
1969
- struct {
1970
- long long x;
1971
- long long y;
1972
- long long z;
1973
- long long w;
1974
- };
1975
- long long data[4];
1976
- } longlong4;
1977
- typedef union {
1978
- long long data[8];
1979
- } longlong8;
1980
- typedef union {
1981
- long long data[16];
1982
- } longlong16;
1983
- typedef union {
1984
- struct {
1985
- long long x;
1986
- long long y;
1987
- long long z;
1988
- };
1989
- long long data[3];
1990
- } longlong3;
1991
-
1992
- typedef union {
1993
- struct {
1994
- unsigned long long x;
1995
- };
1996
- unsigned long long data;
1997
- } ulonglong1;
1998
- typedef union {
1999
- struct {
2000
- unsigned long long x;
2001
- unsigned long long y;
2002
- };
2003
- unsigned long long data[2];
2004
- } ulonglong2;
2005
- typedef union {
2006
- struct {
2007
- unsigned long long x;
2008
- unsigned long long y;
2009
- unsigned long long z;
2010
- unsigned long long w;
2011
- };
2012
- unsigned long long data[4];
2013
- } ulonglong4;
2014
- typedef union {
2015
- unsigned long long data[8];
2016
- } ulonglong8;
2017
- typedef union {
2018
- unsigned long long data[16];
2019
- } ulonglong16;
2020
- typedef union {
2021
- struct {
2022
- unsigned long long x;
2023
- unsigned long long y;
2024
- unsigned long long z;
2025
- };
2026
- unsigned long long data[3];
2027
- } ulonglong3;
2028
-
2029
- typedef union {
2030
- struct {
2031
- float x;
2032
- };
2033
- float data;
2034
- } float1;
2035
- typedef union {
2036
- struct {
2037
- float x;
2038
- float y;
2039
- };
2040
- float data[2];
2041
- } float2;
2042
- typedef union {
2043
- struct {
2044
- float x;
2045
- float y;
2046
- float z;
2047
- float w;
2048
- };
2049
- float data[4];
2050
- } float4;
2051
- typedef union {
2052
- float data[8];
2053
- } float8;
2054
- typedef union {
2055
- float data[16];
2056
- } float16;
2057
- typedef union {
2058
- struct {
2059
- float x;
2060
- float y;
2061
- float z;
2062
- };
2063
- float data[3];
2064
- } float3;
2065
-
2066
- typedef union {
2067
- struct {
2068
- double x;
2069
- };
2070
- double data;
2071
- } double1;
2072
- typedef union {
2073
- struct {
2074
- double x;
2075
- double y;
2076
- };
2077
- double data[2];
2078
- } double2;
2079
- typedef union {
2080
- struct {
2081
- double x;
2082
- double y;
2083
- double z;
2084
- double w;
2085
- };
2086
- double data[4];
2087
- } double4;
2088
- typedef union {
2089
- double data[8];
2090
- } double8;
2091
- typedef union {
2092
- double data[16];
2093
- } double16;
2094
- typedef union {
2095
- struct {
2096
- double x;
2097
- double y;
2098
- double z;
2099
- };
2100
- double data[3];
2101
- } double3;
2102
-
2103
- #endif // defined(_MSC_VER)
2104
- #endif // defined(__has_attribute)
2105
-
2106
- #ifdef __cplusplus
2107
- #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2108
- static inline __HOST_DEVICE__ type make_##type(comp x) { \
2109
- type r{x}; \
2110
- return r; \
2111
- }
2112
-
2113
- #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2114
- static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2115
- type r{x, y}; \
2116
- return r; \
2117
- }
2118
-
2119
- #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2120
- static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2121
- type r{x, y, z}; \
2122
- return r; \
2123
- }
2124
-
2125
- #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2126
- static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2127
- type r{x, y, z, w}; \
2128
- return r; \
2129
- }
2130
- #else
2131
- #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2132
- static inline __HOST_DEVICE__ type make_##type(comp x) { \
2133
- type r; \
2134
- r.x = x; \
2135
- return r; \
2136
- }
2137
-
2138
- #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2139
- static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2140
- type r; \
2141
- r.x = x; \
2142
- r.y = y; \
2143
- return r; \
2144
- }
2145
-
2146
- #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2147
- static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2148
- type r; \
2149
- r.x = x; \
2150
- r.y = y; \
2151
- r.z = z; \
2152
- return r; \
2153
- }
2154
-
2155
- #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2156
- static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2157
- type r; \
2158
- r.x = x; \
2159
- r.y = y; \
2160
- r.z = z; \
2161
- r.w = w; \
2162
- return r; \
2163
- }
2164
- #endif
2165
-
2166
- DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
2167
- DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
2168
- DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
2169
- DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
2170
-
2171
- DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
2172
- DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
2173
- DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
2174
- DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
2175
-
2176
- DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
2177
- DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
2178
- DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
2179
- DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
2180
-
2181
- DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
2182
- DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
2183
- DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
2184
- DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
2185
-
2186
- DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
2187
- DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
2188
- DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
2189
- DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
2190
-
2191
- DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
2192
- DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
2193
- DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
2194
- DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
2195
-
2196
- DECLOP_MAKE_ONE_COMPONENT(float, float1);
2197
- DECLOP_MAKE_TWO_COMPONENT(float, float2);
2198
- DECLOP_MAKE_THREE_COMPONENT(float, float3);
2199
- DECLOP_MAKE_FOUR_COMPONENT(float, float4);
2200
-
2201
- DECLOP_MAKE_ONE_COMPONENT(double, double1);
2202
- DECLOP_MAKE_TWO_COMPONENT(double, double2);
2203
- DECLOP_MAKE_THREE_COMPONENT(double, double3);
2204
- DECLOP_MAKE_FOUR_COMPONENT(double, double4);
2205
-
2206
- DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
2207
- DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
2208
- DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
2209
- DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
2210
-
2211
- DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
2212
- DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
2213
- DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
2214
- DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
2215
-
2216
- DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
2217
- DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
2218
- DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
2219
- DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
2220
-
2221
- DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
2222
- DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
2223
- DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
2224
- DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
2225
-
2226
- #endif