triton-windows 3.3.0.post19__cp39-cp39-win_amd64.whl → 3.3.1.post21__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +1 -1
- triton/backends/amd/driver.py +6 -1
- triton/backends/nvidia/compiler.py +1 -3
- triton/backends/nvidia/driver.py +7 -3
- triton/runtime/autotuner.py +2 -2
- triton/runtime/build.py +5 -5
- triton/runtime/tcc/lib/python310.def +1610 -0
- triton/runtime/tcc/lib/python311.def +1633 -0
- triton/runtime/tcc/lib/python312.def +1703 -0
- triton/runtime/tcc/lib/python313.def +1651 -0
- triton/runtime/tcc/lib/python313t.def +1656 -0
- triton/runtime/tcc/lib/python39.def +1644 -0
- triton/runtime/tcc/lib/python3t.def +905 -0
- triton/windows_utils.py +11 -4
- {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/METADATA +1 -1
- {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/RECORD +19 -109
- {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/WHEEL +1 -1
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/top_level.txt +0 -0
|
@@ -1,2226 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
-
|
|
4
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
6
|
-
in the Software without restriction, including without limitation the rights
|
|
7
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
8
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
9
|
-
furnished to do so, subject to the following conditions:
|
|
10
|
-
|
|
11
|
-
The above copyright notice and this permission notice shall be included in
|
|
12
|
-
all copies or substantial portions of the Software.
|
|
13
|
-
|
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
15
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
16
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
17
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
20
|
-
THE SOFTWARE.
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* @file amd_detail/hip_vector_types.h
|
|
25
|
-
* @brief Defines the different newt vector types for HIP runtime.
|
|
26
|
-
*/
|
|
27
|
-
|
|
28
|
-
#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
|
|
29
|
-
#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
|
|
30
|
-
|
|
31
|
-
#include "hip/amd_detail/host_defines.h"
|
|
32
|
-
|
|
33
|
-
#if defined(__HIPCC_RTC__)
|
|
34
|
-
#define __HOST_DEVICE__ __device__
|
|
35
|
-
#else
|
|
36
|
-
#define __HOST_DEVICE__ __host__ __device__
|
|
37
|
-
#endif
|
|
38
|
-
|
|
39
|
-
#if defined(__has_attribute)
|
|
40
|
-
#if __has_attribute(ext_vector_type)
|
|
41
|
-
#define __HIP_USE_NATIVE_VECTOR__ 1
|
|
42
|
-
#define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
|
|
43
|
-
#else
|
|
44
|
-
#define __NATIVE_VECTOR__(n, T) T[n]
|
|
45
|
-
#endif
|
|
46
|
-
|
|
47
|
-
#if defined(__cplusplus)
|
|
48
|
-
#if !defined(__HIPCC_RTC__)
|
|
49
|
-
#include <array>
|
|
50
|
-
#include <iosfwd>
|
|
51
|
-
#include <type_traits>
|
|
52
|
-
#else
|
|
53
|
-
namespace std {
|
|
54
|
-
using ::size_t;
|
|
55
|
-
|
|
56
|
-
template <class _Tp, _Tp __v> struct integral_constant {
|
|
57
|
-
static constexpr const _Tp value = __v;
|
|
58
|
-
typedef _Tp value_type;
|
|
59
|
-
typedef integral_constant type;
|
|
60
|
-
constexpr operator value_type() const { return value; }
|
|
61
|
-
constexpr value_type operator()() const { return value; }
|
|
62
|
-
};
|
|
63
|
-
template <class _Tp, _Tp __v> constexpr const _Tp integral_constant<_Tp, __v>::value;
|
|
64
|
-
|
|
65
|
-
typedef integral_constant<bool, true> true_type;
|
|
66
|
-
typedef integral_constant<bool, false> false_type;
|
|
67
|
-
|
|
68
|
-
template <bool B> using bool_constant = integral_constant<bool, B>;
|
|
69
|
-
typedef bool_constant<true> true_type;
|
|
70
|
-
typedef bool_constant<false> false_type;
|
|
71
|
-
|
|
72
|
-
template <bool __B, class __T = void> struct enable_if {};
|
|
73
|
-
template <class __T> struct enable_if<true, __T> { typedef __T type; };
|
|
74
|
-
|
|
75
|
-
template<bool _B> struct true_or_false_type : public false_type {};
|
|
76
|
-
template<> struct true_or_false_type<true> : public true_type {};
|
|
77
|
-
|
|
78
|
-
template <class _Tp> struct is_integral : public false_type {};
|
|
79
|
-
template <> struct is_integral<bool> : public true_type {};
|
|
80
|
-
template <> struct is_integral<char> : public true_type {};
|
|
81
|
-
template <> struct is_integral<signed char> : public true_type {};
|
|
82
|
-
template <> struct is_integral<unsigned char> : public true_type {};
|
|
83
|
-
template <> struct is_integral<wchar_t> : public true_type {};
|
|
84
|
-
template <> struct is_integral<short> : public true_type {};
|
|
85
|
-
template <> struct is_integral<unsigned short> : public true_type {};
|
|
86
|
-
template <> struct is_integral<int> : public true_type {};
|
|
87
|
-
template <> struct is_integral<unsigned int> : public true_type {};
|
|
88
|
-
template <> struct is_integral<long> : public true_type {};
|
|
89
|
-
template <> struct is_integral<unsigned long> : public true_type {};
|
|
90
|
-
template <> struct is_integral<long long> : public true_type {};
|
|
91
|
-
template <> struct is_integral<unsigned long long> : public true_type {};
|
|
92
|
-
|
|
93
|
-
template <class _Tp> struct is_arithmetic : public false_type {};
|
|
94
|
-
template <> struct is_arithmetic<bool> : public true_type {};
|
|
95
|
-
template <> struct is_arithmetic<char> : public true_type {};
|
|
96
|
-
template <> struct is_arithmetic<signed char> : public true_type {};
|
|
97
|
-
template <> struct is_arithmetic<unsigned char> : public true_type {};
|
|
98
|
-
template <> struct is_arithmetic<wchar_t> : public true_type {};
|
|
99
|
-
template <> struct is_arithmetic<short> : public true_type {};
|
|
100
|
-
template <> struct is_arithmetic<unsigned short> : public true_type {};
|
|
101
|
-
template <> struct is_arithmetic<int> : public true_type {};
|
|
102
|
-
template <> struct is_arithmetic<unsigned int> : public true_type {};
|
|
103
|
-
template <> struct is_arithmetic<long> : public true_type {};
|
|
104
|
-
template <> struct is_arithmetic<unsigned long> : public true_type {};
|
|
105
|
-
template <> struct is_arithmetic<long long> : public true_type {};
|
|
106
|
-
template <> struct is_arithmetic<unsigned long long> : public true_type {};
|
|
107
|
-
template <> struct is_arithmetic<float> : public true_type {};
|
|
108
|
-
template <> struct is_arithmetic<double> : public true_type {};
|
|
109
|
-
|
|
110
|
-
template<typename _Tp> struct is_floating_point : public false_type {};
|
|
111
|
-
template<> struct is_floating_point<float> : public true_type {};
|
|
112
|
-
template<> struct is_floating_point<double> : public true_type {};
|
|
113
|
-
template<> struct is_floating_point<long double> : public true_type {};
|
|
114
|
-
|
|
115
|
-
template <typename __T, typename __U> struct is_same : public false_type {};
|
|
116
|
-
template <typename __T> struct is_same<__T, __T> : public true_type {};
|
|
117
|
-
|
|
118
|
-
template<typename _Tp, bool = is_arithmetic<_Tp>::value>
|
|
119
|
-
struct is_signed : public false_type {};
|
|
120
|
-
template<typename _Tp>
|
|
121
|
-
struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> {};
|
|
122
|
-
|
|
123
|
-
template <class _T1, class _T2> struct is_convertible
|
|
124
|
-
: public true_or_false_type<__is_convertible_to(_T1, _T2)> {};
|
|
125
|
-
|
|
126
|
-
template<typename _CharT> struct char_traits;
|
|
127
|
-
template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_istream;
|
|
128
|
-
template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_ostream;
|
|
129
|
-
typedef basic_istream<char> istream;
|
|
130
|
-
typedef basic_ostream<char> ostream;
|
|
131
|
-
|
|
132
|
-
template <typename __T> struct is_scalar : public integral_constant<bool, __is_scalar(__T)> {};
|
|
133
|
-
} // Namespace std.
|
|
134
|
-
#endif // defined(__HIPCC_RTC__)
|
|
135
|
-
|
|
136
|
-
namespace hip_impl {
|
|
137
|
-
inline
|
|
138
|
-
constexpr
|
|
139
|
-
unsigned int next_pot(unsigned int x) {
|
|
140
|
-
// Precondition: x > 1.
|
|
141
|
-
return 1u << (32u - __builtin_clz(x - 1u));
|
|
142
|
-
}
|
|
143
|
-
} // Namespace hip_impl.
|
|
144
|
-
|
|
145
|
-
template<typename T, unsigned int n> struct HIP_vector_base;
|
|
146
|
-
|
|
147
|
-
template<typename T>
|
|
148
|
-
struct HIP_vector_base<T, 1> {
|
|
149
|
-
using Native_vec_ = __NATIVE_VECTOR__(1, T);
|
|
150
|
-
|
|
151
|
-
union {
|
|
152
|
-
Native_vec_ data;
|
|
153
|
-
struct {
|
|
154
|
-
T x;
|
|
155
|
-
};
|
|
156
|
-
};
|
|
157
|
-
|
|
158
|
-
using value_type = T;
|
|
159
|
-
|
|
160
|
-
__HOST_DEVICE__
|
|
161
|
-
HIP_vector_base() = default;
|
|
162
|
-
__HOST_DEVICE__
|
|
163
|
-
explicit
|
|
164
|
-
constexpr
|
|
165
|
-
HIP_vector_base(T x_) noexcept : data{x_} {}
|
|
166
|
-
__HOST_DEVICE__
|
|
167
|
-
constexpr
|
|
168
|
-
HIP_vector_base(const HIP_vector_base&) = default;
|
|
169
|
-
__HOST_DEVICE__
|
|
170
|
-
constexpr
|
|
171
|
-
HIP_vector_base(HIP_vector_base&&) = default;
|
|
172
|
-
__HOST_DEVICE__
|
|
173
|
-
~HIP_vector_base() = default;
|
|
174
|
-
__HOST_DEVICE__
|
|
175
|
-
HIP_vector_base& operator=(const HIP_vector_base&) = default;
|
|
176
|
-
};
|
|
177
|
-
|
|
178
|
-
template<typename T>
|
|
179
|
-
struct HIP_vector_base<T, 2> {
|
|
180
|
-
using Native_vec_ = __NATIVE_VECTOR__(2, T);
|
|
181
|
-
|
|
182
|
-
union
|
|
183
|
-
#if !__has_attribute(ext_vector_type)
|
|
184
|
-
alignas(hip_impl::next_pot(2 * sizeof(T)))
|
|
185
|
-
#endif
|
|
186
|
-
{
|
|
187
|
-
Native_vec_ data;
|
|
188
|
-
struct {
|
|
189
|
-
T x;
|
|
190
|
-
T y;
|
|
191
|
-
};
|
|
192
|
-
};
|
|
193
|
-
|
|
194
|
-
using value_type = T;
|
|
195
|
-
|
|
196
|
-
__HOST_DEVICE__
|
|
197
|
-
HIP_vector_base() = default;
|
|
198
|
-
__HOST_DEVICE__
|
|
199
|
-
explicit
|
|
200
|
-
constexpr
|
|
201
|
-
HIP_vector_base(T x_) noexcept : data{x_, x_} {}
|
|
202
|
-
__HOST_DEVICE__
|
|
203
|
-
constexpr
|
|
204
|
-
HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
|
|
205
|
-
__HOST_DEVICE__
|
|
206
|
-
constexpr
|
|
207
|
-
HIP_vector_base(const HIP_vector_base&) = default;
|
|
208
|
-
__HOST_DEVICE__
|
|
209
|
-
constexpr
|
|
210
|
-
HIP_vector_base(HIP_vector_base&&) = default;
|
|
211
|
-
__HOST_DEVICE__
|
|
212
|
-
~HIP_vector_base() = default;
|
|
213
|
-
__HOST_DEVICE__
|
|
214
|
-
HIP_vector_base& operator=(const HIP_vector_base&) = default;
|
|
215
|
-
};
|
|
216
|
-
|
|
217
|
-
template<typename T>
|
|
218
|
-
struct HIP_vector_base<T, 3> {
|
|
219
|
-
struct Native_vec_ {
|
|
220
|
-
T d[3];
|
|
221
|
-
|
|
222
|
-
__HOST_DEVICE__
|
|
223
|
-
Native_vec_() = default;
|
|
224
|
-
|
|
225
|
-
__HOST_DEVICE__
|
|
226
|
-
explicit
|
|
227
|
-
constexpr
|
|
228
|
-
Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
|
|
229
|
-
__HOST_DEVICE__
|
|
230
|
-
constexpr
|
|
231
|
-
Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
|
|
232
|
-
__HOST_DEVICE__
|
|
233
|
-
constexpr
|
|
234
|
-
Native_vec_(const Native_vec_&) = default;
|
|
235
|
-
__HOST_DEVICE__
|
|
236
|
-
constexpr
|
|
237
|
-
Native_vec_(Native_vec_&&) = default;
|
|
238
|
-
__HOST_DEVICE__
|
|
239
|
-
~Native_vec_() = default;
|
|
240
|
-
|
|
241
|
-
__HOST_DEVICE__
|
|
242
|
-
Native_vec_& operator=(const Native_vec_&) = default;
|
|
243
|
-
__HOST_DEVICE__
|
|
244
|
-
Native_vec_& operator=(Native_vec_&&) = default;
|
|
245
|
-
|
|
246
|
-
__HOST_DEVICE__
|
|
247
|
-
T& operator[](unsigned int idx) noexcept { return d[idx]; }
|
|
248
|
-
__HOST_DEVICE__
|
|
249
|
-
T operator[](unsigned int idx) const noexcept { return d[idx]; }
|
|
250
|
-
|
|
251
|
-
__HOST_DEVICE__
|
|
252
|
-
Native_vec_& operator+=(const Native_vec_& x_) noexcept
|
|
253
|
-
{
|
|
254
|
-
for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
|
|
255
|
-
return *this;
|
|
256
|
-
}
|
|
257
|
-
__HOST_DEVICE__
|
|
258
|
-
Native_vec_& operator-=(const Native_vec_& x_) noexcept
|
|
259
|
-
{
|
|
260
|
-
for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
|
|
261
|
-
return *this;
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
__HOST_DEVICE__
|
|
265
|
-
Native_vec_& operator*=(const Native_vec_& x_) noexcept
|
|
266
|
-
{
|
|
267
|
-
for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
|
|
268
|
-
return *this;
|
|
269
|
-
}
|
|
270
|
-
__HOST_DEVICE__
|
|
271
|
-
Native_vec_& operator/=(const Native_vec_& x_) noexcept
|
|
272
|
-
{
|
|
273
|
-
for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
|
|
274
|
-
return *this;
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
template<
|
|
278
|
-
typename U = T,
|
|
279
|
-
typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
|
|
280
|
-
__HOST_DEVICE__
|
|
281
|
-
Native_vec_ operator-() const noexcept
|
|
282
|
-
{
|
|
283
|
-
auto r{*this};
|
|
284
|
-
for (auto&& x : r.d) x = -x;
|
|
285
|
-
return r;
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
template<
|
|
289
|
-
typename U = T,
|
|
290
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
291
|
-
__HOST_DEVICE__
|
|
292
|
-
Native_vec_ operator~() const noexcept
|
|
293
|
-
{
|
|
294
|
-
auto r{*this};
|
|
295
|
-
for (auto&& x : r.d) x = ~x;
|
|
296
|
-
return r;
|
|
297
|
-
}
|
|
298
|
-
template<
|
|
299
|
-
typename U = T,
|
|
300
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
301
|
-
__HOST_DEVICE__
|
|
302
|
-
Native_vec_& operator%=(const Native_vec_& x_) noexcept
|
|
303
|
-
{
|
|
304
|
-
for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
|
|
305
|
-
return *this;
|
|
306
|
-
}
|
|
307
|
-
template<
|
|
308
|
-
typename U = T,
|
|
309
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
310
|
-
__HOST_DEVICE__
|
|
311
|
-
Native_vec_& operator^=(const Native_vec_& x_) noexcept
|
|
312
|
-
{
|
|
313
|
-
for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
|
|
314
|
-
return *this;
|
|
315
|
-
}
|
|
316
|
-
template<
|
|
317
|
-
typename U = T,
|
|
318
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
319
|
-
__HOST_DEVICE__
|
|
320
|
-
Native_vec_& operator|=(const Native_vec_& x_) noexcept
|
|
321
|
-
{
|
|
322
|
-
for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
|
|
323
|
-
return *this;
|
|
324
|
-
}
|
|
325
|
-
template<
|
|
326
|
-
typename U = T,
|
|
327
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
328
|
-
__HOST_DEVICE__
|
|
329
|
-
Native_vec_& operator&=(const Native_vec_& x_) noexcept
|
|
330
|
-
{
|
|
331
|
-
for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
|
|
332
|
-
return *this;
|
|
333
|
-
}
|
|
334
|
-
template<
|
|
335
|
-
typename U = T,
|
|
336
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
337
|
-
__HOST_DEVICE__
|
|
338
|
-
Native_vec_& operator>>=(const Native_vec_& x_) noexcept
|
|
339
|
-
{
|
|
340
|
-
for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
|
|
341
|
-
return *this;
|
|
342
|
-
}
|
|
343
|
-
template<
|
|
344
|
-
typename U = T,
|
|
345
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
346
|
-
__HOST_DEVICE__
|
|
347
|
-
Native_vec_& operator<<=(const Native_vec_& x_) noexcept
|
|
348
|
-
{
|
|
349
|
-
for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
|
|
350
|
-
return *this;
|
|
351
|
-
}
|
|
352
|
-
#if defined (__INTEL_COMPILER)
|
|
353
|
-
typedef struct {
|
|
354
|
-
int values[4];
|
|
355
|
-
} _Vec3_cmp;
|
|
356
|
-
using Vec3_cmp = _Vec3_cmp;
|
|
357
|
-
#else
|
|
358
|
-
using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
|
|
359
|
-
#endif //INTEL
|
|
360
|
-
__HOST_DEVICE__
|
|
361
|
-
Vec3_cmp operator==(const Native_vec_& x_) const noexcept
|
|
362
|
-
{
|
|
363
|
-
return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
|
|
364
|
-
}
|
|
365
|
-
};
|
|
366
|
-
|
|
367
|
-
union {
|
|
368
|
-
Native_vec_ data;
|
|
369
|
-
struct {
|
|
370
|
-
T x;
|
|
371
|
-
T y;
|
|
372
|
-
T z;
|
|
373
|
-
};
|
|
374
|
-
};
|
|
375
|
-
|
|
376
|
-
using value_type = T;
|
|
377
|
-
|
|
378
|
-
__HOST_DEVICE__
|
|
379
|
-
HIP_vector_base() = default;
|
|
380
|
-
__HOST_DEVICE__
|
|
381
|
-
explicit
|
|
382
|
-
constexpr
|
|
383
|
-
HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
|
|
384
|
-
__HOST_DEVICE__
|
|
385
|
-
constexpr
|
|
386
|
-
HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
|
|
387
|
-
__HOST_DEVICE__
|
|
388
|
-
constexpr
|
|
389
|
-
HIP_vector_base(const HIP_vector_base&) = default;
|
|
390
|
-
__HOST_DEVICE__
|
|
391
|
-
constexpr
|
|
392
|
-
HIP_vector_base(HIP_vector_base&&) = default;
|
|
393
|
-
__HOST_DEVICE__
|
|
394
|
-
~HIP_vector_base() = default;
|
|
395
|
-
|
|
396
|
-
__HOST_DEVICE__
|
|
397
|
-
HIP_vector_base& operator=(const HIP_vector_base&) = default;
|
|
398
|
-
__HOST_DEVICE__
|
|
399
|
-
HIP_vector_base& operator=(HIP_vector_base&&) = default;
|
|
400
|
-
};
|
|
401
|
-
|
|
402
|
-
template<typename T>
|
|
403
|
-
struct HIP_vector_base<T, 4> {
|
|
404
|
-
using Native_vec_ = __NATIVE_VECTOR__(4, T);
|
|
405
|
-
|
|
406
|
-
union
|
|
407
|
-
#if !__has_attribute(ext_vector_type)
|
|
408
|
-
alignas(hip_impl::next_pot(4 * sizeof(T)))
|
|
409
|
-
#endif
|
|
410
|
-
{
|
|
411
|
-
Native_vec_ data;
|
|
412
|
-
struct {
|
|
413
|
-
T x;
|
|
414
|
-
T y;
|
|
415
|
-
T z;
|
|
416
|
-
T w;
|
|
417
|
-
};
|
|
418
|
-
};
|
|
419
|
-
|
|
420
|
-
using value_type = T;
|
|
421
|
-
|
|
422
|
-
__HOST_DEVICE__
|
|
423
|
-
HIP_vector_base() = default;
|
|
424
|
-
__HOST_DEVICE__
|
|
425
|
-
explicit
|
|
426
|
-
constexpr
|
|
427
|
-
HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
|
|
428
|
-
__HOST_DEVICE__
|
|
429
|
-
constexpr
|
|
430
|
-
HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
|
|
431
|
-
__HOST_DEVICE__
|
|
432
|
-
constexpr
|
|
433
|
-
HIP_vector_base(const HIP_vector_base&) = default;
|
|
434
|
-
__HOST_DEVICE__
|
|
435
|
-
constexpr
|
|
436
|
-
HIP_vector_base(HIP_vector_base&&) = default;
|
|
437
|
-
__HOST_DEVICE__
|
|
438
|
-
~HIP_vector_base() = default;
|
|
439
|
-
__HOST_DEVICE__
|
|
440
|
-
HIP_vector_base& operator=(const HIP_vector_base&) = default;
|
|
441
|
-
};
|
|
442
|
-
|
|
443
|
-
template<typename T, unsigned int rank>
|
|
444
|
-
struct HIP_vector_type : public HIP_vector_base<T, rank> {
|
|
445
|
-
using HIP_vector_base<T, rank>::data;
|
|
446
|
-
using typename HIP_vector_base<T, rank>::Native_vec_;
|
|
447
|
-
|
|
448
|
-
__HOST_DEVICE__
|
|
449
|
-
HIP_vector_type() = default;
|
|
450
|
-
template<
|
|
451
|
-
typename U,
|
|
452
|
-
typename std::enable_if<
|
|
453
|
-
std::is_convertible<U, T>::value>::type* = nullptr>
|
|
454
|
-
__HOST_DEVICE__
|
|
455
|
-
explicit
|
|
456
|
-
constexpr
|
|
457
|
-
HIP_vector_type(U x_) noexcept
|
|
458
|
-
: HIP_vector_base<T, rank>{static_cast<T>(x_)}
|
|
459
|
-
{}
|
|
460
|
-
template< // TODO: constrain based on type as well.
|
|
461
|
-
typename... Us,
|
|
462
|
-
typename std::enable_if<
|
|
463
|
-
(rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
|
|
464
|
-
__HOST_DEVICE__
|
|
465
|
-
constexpr
|
|
466
|
-
HIP_vector_type(Us... xs) noexcept
|
|
467
|
-
: HIP_vector_base<T, rank>{static_cast<T>(xs)...}
|
|
468
|
-
{}
|
|
469
|
-
__HOST_DEVICE__
|
|
470
|
-
constexpr
|
|
471
|
-
HIP_vector_type(const HIP_vector_type&) = default;
|
|
472
|
-
__HOST_DEVICE__
|
|
473
|
-
constexpr
|
|
474
|
-
HIP_vector_type(HIP_vector_type&&) = default;
|
|
475
|
-
__HOST_DEVICE__
|
|
476
|
-
~HIP_vector_type() = default;
|
|
477
|
-
|
|
478
|
-
__HOST_DEVICE__
|
|
479
|
-
HIP_vector_type& operator=(const HIP_vector_type&) = default;
|
|
480
|
-
__HOST_DEVICE__
|
|
481
|
-
HIP_vector_type& operator=(HIP_vector_type&&) = default;
|
|
482
|
-
|
|
483
|
-
// Operators
|
|
484
|
-
__HOST_DEVICE__
|
|
485
|
-
HIP_vector_type& operator++() noexcept
|
|
486
|
-
{
|
|
487
|
-
return *this += HIP_vector_type{1};
|
|
488
|
-
}
|
|
489
|
-
__HOST_DEVICE__
|
|
490
|
-
HIP_vector_type operator++(int) noexcept
|
|
491
|
-
{
|
|
492
|
-
auto tmp(*this);
|
|
493
|
-
++*this;
|
|
494
|
-
return tmp;
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
__HOST_DEVICE__
|
|
498
|
-
HIP_vector_type& operator--() noexcept
|
|
499
|
-
{
|
|
500
|
-
return *this -= HIP_vector_type{1};
|
|
501
|
-
}
|
|
502
|
-
__HOST_DEVICE__
|
|
503
|
-
HIP_vector_type operator--(int) noexcept
|
|
504
|
-
{
|
|
505
|
-
auto tmp(*this);
|
|
506
|
-
--*this;
|
|
507
|
-
return tmp;
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
__HOST_DEVICE__
|
|
511
|
-
HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
|
|
512
|
-
{
|
|
513
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
514
|
-
data += x.data;
|
|
515
|
-
#else
|
|
516
|
-
for (auto i = 0u; i != rank; ++i) data[i] += x.data[i];
|
|
517
|
-
#endif
|
|
518
|
-
return *this;
|
|
519
|
-
}
|
|
520
|
-
template<
|
|
521
|
-
typename U,
|
|
522
|
-
typename std::enable_if<
|
|
523
|
-
std::is_convertible<U, T>{}>::type* = nullptr>
|
|
524
|
-
__HOST_DEVICE__
|
|
525
|
-
HIP_vector_type& operator+=(U x) noexcept
|
|
526
|
-
{
|
|
527
|
-
return *this += HIP_vector_type{x};
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
__HOST_DEVICE__
|
|
531
|
-
HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
|
|
532
|
-
{
|
|
533
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
534
|
-
data -= x.data;
|
|
535
|
-
#else
|
|
536
|
-
for (auto i = 0u; i != rank; ++i) data[i] -= x.data[i];
|
|
537
|
-
#endif
|
|
538
|
-
return *this;
|
|
539
|
-
}
|
|
540
|
-
template<
|
|
541
|
-
typename U,
|
|
542
|
-
typename std::enable_if<
|
|
543
|
-
std::is_convertible<U, T>{}>::type* = nullptr>
|
|
544
|
-
__HOST_DEVICE__
|
|
545
|
-
HIP_vector_type& operator-=(U x) noexcept
|
|
546
|
-
{
|
|
547
|
-
return *this -= HIP_vector_type{x};
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
__HOST_DEVICE__
|
|
551
|
-
HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
|
|
552
|
-
{
|
|
553
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
554
|
-
data *= x.data;
|
|
555
|
-
#else
|
|
556
|
-
for (auto i = 0u; i != rank; ++i) data[i] *= x.data[i];
|
|
557
|
-
#endif
|
|
558
|
-
return *this;
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
|
|
562
|
-
HIP_vector_type x, const HIP_vector_type& y) noexcept
|
|
563
|
-
{
|
|
564
|
-
return HIP_vector_type{ x } *= y;
|
|
565
|
-
}
|
|
566
|
-
|
|
567
|
-
template<
|
|
568
|
-
typename U,
|
|
569
|
-
typename std::enable_if<
|
|
570
|
-
std::is_convertible<U, T>{}>::type* = nullptr>
|
|
571
|
-
__HOST_DEVICE__
|
|
572
|
-
HIP_vector_type& operator*=(U x) noexcept
|
|
573
|
-
{
|
|
574
|
-
return *this *= HIP_vector_type{x};
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
|
|
578
|
-
HIP_vector_type x, const HIP_vector_type& y) noexcept
|
|
579
|
-
{
|
|
580
|
-
return HIP_vector_type{ x } /= y;
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
__HOST_DEVICE__
|
|
584
|
-
HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
|
|
585
|
-
{
|
|
586
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
587
|
-
data /= x.data;
|
|
588
|
-
#else
|
|
589
|
-
for (auto i = 0u; i != rank; ++i) data[i] /= x.data[i];
|
|
590
|
-
#endif
|
|
591
|
-
return *this;
|
|
592
|
-
}
|
|
593
|
-
template<
|
|
594
|
-
typename U,
|
|
595
|
-
typename std::enable_if<
|
|
596
|
-
std::is_convertible<U, T>{}>::type* = nullptr>
|
|
597
|
-
__HOST_DEVICE__
|
|
598
|
-
HIP_vector_type& operator/=(U x) noexcept
|
|
599
|
-
{
|
|
600
|
-
return *this /= HIP_vector_type{x};
|
|
601
|
-
}
|
|
602
|
-
|
|
603
|
-
template<
|
|
604
|
-
typename U = T,
|
|
605
|
-
typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
|
|
606
|
-
__HOST_DEVICE__
|
|
607
|
-
HIP_vector_type operator-() const noexcept
|
|
608
|
-
{
|
|
609
|
-
auto tmp(*this);
|
|
610
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
611
|
-
tmp.data = -tmp.data;
|
|
612
|
-
#else
|
|
613
|
-
for (auto i = 0u; i != rank; ++i) tmp.data[i] = -tmp.data[i];
|
|
614
|
-
#endif
|
|
615
|
-
return tmp;
|
|
616
|
-
}
|
|
617
|
-
|
|
618
|
-
template<
|
|
619
|
-
typename U = T,
|
|
620
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
621
|
-
__HOST_DEVICE__
|
|
622
|
-
HIP_vector_type operator~() const noexcept
|
|
623
|
-
{
|
|
624
|
-
HIP_vector_type r{*this};
|
|
625
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
626
|
-
r.data = ~r.data;
|
|
627
|
-
#else
|
|
628
|
-
for (auto i = 0u; i != rank; ++i) r.data[i] = ~r.data[i];
|
|
629
|
-
#endif
|
|
630
|
-
return r;
|
|
631
|
-
}
|
|
632
|
-
|
|
633
|
-
template<
|
|
634
|
-
typename U = T,
|
|
635
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
636
|
-
__HOST_DEVICE__
|
|
637
|
-
HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
|
|
638
|
-
{
|
|
639
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
640
|
-
data %= x.data;
|
|
641
|
-
#else
|
|
642
|
-
for (auto i = 0u; i != rank; ++i) data[i] %= x.data[i];
|
|
643
|
-
#endif
|
|
644
|
-
return *this;
|
|
645
|
-
}
|
|
646
|
-
|
|
647
|
-
template<
|
|
648
|
-
typename U = T,
|
|
649
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
650
|
-
__HOST_DEVICE__
|
|
651
|
-
HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
|
|
652
|
-
{
|
|
653
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
654
|
-
data ^= x.data;
|
|
655
|
-
#else
|
|
656
|
-
for (auto i = 0u; i != rank; ++i) data[i] ^= x.data[i];
|
|
657
|
-
#endif
|
|
658
|
-
return *this;
|
|
659
|
-
}
|
|
660
|
-
|
|
661
|
-
template<
|
|
662
|
-
typename U = T,
|
|
663
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
664
|
-
__HOST_DEVICE__
|
|
665
|
-
HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
|
|
666
|
-
{
|
|
667
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
668
|
-
data |= x.data;
|
|
669
|
-
#else
|
|
670
|
-
for (auto i = 0u; i != rank; ++i) data[i] |= x.data[i];
|
|
671
|
-
#endif
|
|
672
|
-
return *this;
|
|
673
|
-
}
|
|
674
|
-
|
|
675
|
-
template<
|
|
676
|
-
typename U = T,
|
|
677
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
678
|
-
__HOST_DEVICE__
|
|
679
|
-
HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
|
|
680
|
-
{
|
|
681
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
682
|
-
data &= x.data;
|
|
683
|
-
#else
|
|
684
|
-
for (auto i = 0u; i != rank; ++i) data[i] &= x.data[i];
|
|
685
|
-
#endif
|
|
686
|
-
return *this;
|
|
687
|
-
}
|
|
688
|
-
|
|
689
|
-
template<
|
|
690
|
-
typename U = T,
|
|
691
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
692
|
-
__HOST_DEVICE__
|
|
693
|
-
HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
|
|
694
|
-
{
|
|
695
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
696
|
-
data >>= x.data;
|
|
697
|
-
#else
|
|
698
|
-
for (auto i = 0u; i != rank; ++i) data[i] >>= x.data[i];
|
|
699
|
-
#endif
|
|
700
|
-
return *this;
|
|
701
|
-
}
|
|
702
|
-
|
|
703
|
-
template<
|
|
704
|
-
typename U = T,
|
|
705
|
-
typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
|
|
706
|
-
__HOST_DEVICE__
|
|
707
|
-
HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
|
|
708
|
-
{
|
|
709
|
-
#if __HIP_USE_NATIVE_VECTOR__
|
|
710
|
-
data <<= x.data;
|
|
711
|
-
#else
|
|
712
|
-
for (auto i = 0u; i != rank; ++i) data[i] <<= x.data[i];
|
|
713
|
-
#endif
|
|
714
|
-
return *this;
|
|
715
|
-
}
|
|
716
|
-
};
|
|
717
|
-
|
|
718
|
-
template<typename T, unsigned int n>
|
|
719
|
-
__HOST_DEVICE__
|
|
720
|
-
inline
|
|
721
|
-
constexpr
|
|
722
|
-
HIP_vector_type<T, n> operator+(
|
|
723
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
724
|
-
{
|
|
725
|
-
return HIP_vector_type<T, n>{x} += y;
|
|
726
|
-
}
|
|
727
|
-
template<typename T, unsigned int n, typename U>
|
|
728
|
-
__HOST_DEVICE__
|
|
729
|
-
inline
|
|
730
|
-
constexpr
|
|
731
|
-
HIP_vector_type<T, n> operator+(
|
|
732
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
733
|
-
{
|
|
734
|
-
return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
|
|
735
|
-
}
|
|
736
|
-
template<typename T, unsigned int n, typename U>
|
|
737
|
-
__HOST_DEVICE__
|
|
738
|
-
inline
|
|
739
|
-
constexpr
|
|
740
|
-
HIP_vector_type<T, n> operator+(
|
|
741
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
742
|
-
{
|
|
743
|
-
return HIP_vector_type<T, n>{x} += y;
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
template<typename T, unsigned int n>
|
|
747
|
-
__HOST_DEVICE__
|
|
748
|
-
inline
|
|
749
|
-
constexpr
|
|
750
|
-
HIP_vector_type<T, n> operator-(
|
|
751
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
752
|
-
{
|
|
753
|
-
return HIP_vector_type<T, n>{x} -= y;
|
|
754
|
-
}
|
|
755
|
-
template<typename T, unsigned int n, typename U>
|
|
756
|
-
__HOST_DEVICE__
|
|
757
|
-
inline
|
|
758
|
-
constexpr
|
|
759
|
-
HIP_vector_type<T, n> operator-(
|
|
760
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
761
|
-
{
|
|
762
|
-
return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
|
|
763
|
-
}
|
|
764
|
-
template<typename T, unsigned int n, typename U>
|
|
765
|
-
__HOST_DEVICE__
|
|
766
|
-
inline
|
|
767
|
-
constexpr
|
|
768
|
-
HIP_vector_type<T, n> operator-(
|
|
769
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
770
|
-
{
|
|
771
|
-
return HIP_vector_type<T, n>{x} -= y;
|
|
772
|
-
}
|
|
773
|
-
|
|
774
|
-
template<typename T, unsigned int n, typename U>
|
|
775
|
-
__HOST_DEVICE__
|
|
776
|
-
inline
|
|
777
|
-
constexpr
|
|
778
|
-
HIP_vector_type<T, n> operator*(
|
|
779
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
780
|
-
{
|
|
781
|
-
return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
|
|
782
|
-
}
|
|
783
|
-
template<typename T, unsigned int n, typename U>
|
|
784
|
-
__HOST_DEVICE__
|
|
785
|
-
inline
|
|
786
|
-
constexpr
|
|
787
|
-
HIP_vector_type<T, n> operator*(
|
|
788
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
789
|
-
{
|
|
790
|
-
return HIP_vector_type<T, n>{x} *= y;
|
|
791
|
-
}
|
|
792
|
-
|
|
793
|
-
template<typename T, unsigned int n, typename U>
|
|
794
|
-
__HOST_DEVICE__
|
|
795
|
-
inline
|
|
796
|
-
constexpr
|
|
797
|
-
HIP_vector_type<T, n> operator/(
|
|
798
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
799
|
-
{
|
|
800
|
-
return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
|
|
801
|
-
}
|
|
802
|
-
template<typename T, unsigned int n, typename U>
|
|
803
|
-
__HOST_DEVICE__
|
|
804
|
-
inline
|
|
805
|
-
constexpr
|
|
806
|
-
HIP_vector_type<T, n> operator/(
|
|
807
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
808
|
-
{
|
|
809
|
-
return HIP_vector_type<T, n>{x} /= y;
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
template<typename V>
|
|
813
|
-
__HOST_DEVICE__
|
|
814
|
-
inline
|
|
815
|
-
constexpr
|
|
816
|
-
bool _hip_compare(const V& x, const V& y, int n) noexcept
|
|
817
|
-
{
|
|
818
|
-
return
|
|
819
|
-
(n == -1) ? true : ((x[n] != y[n]) ? false : _hip_compare(x, y, n - 1));
|
|
820
|
-
}
|
|
821
|
-
|
|
822
|
-
template<typename T, unsigned int n>
|
|
823
|
-
__HOST_DEVICE__
|
|
824
|
-
inline
|
|
825
|
-
constexpr
|
|
826
|
-
bool operator==(
|
|
827
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
828
|
-
{
|
|
829
|
-
return _hip_compare(x.data, y.data, n - 1);
|
|
830
|
-
}
|
|
831
|
-
template<typename T, unsigned int n, typename U>
|
|
832
|
-
__HOST_DEVICE__
|
|
833
|
-
inline
|
|
834
|
-
constexpr
|
|
835
|
-
bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
|
|
836
|
-
{
|
|
837
|
-
return x == HIP_vector_type<T, n>{y};
|
|
838
|
-
}
|
|
839
|
-
template<typename T, unsigned int n, typename U>
|
|
840
|
-
__HOST_DEVICE__
|
|
841
|
-
inline
|
|
842
|
-
constexpr
|
|
843
|
-
bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
|
|
844
|
-
{
|
|
845
|
-
return HIP_vector_type<T, n>{x} == y;
|
|
846
|
-
}
|
|
847
|
-
|
|
848
|
-
template<typename T, unsigned int n>
|
|
849
|
-
__HOST_DEVICE__
|
|
850
|
-
inline
|
|
851
|
-
constexpr
|
|
852
|
-
bool operator!=(
|
|
853
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
854
|
-
{
|
|
855
|
-
return !(x == y);
|
|
856
|
-
}
|
|
857
|
-
template<typename T, unsigned int n, typename U>
|
|
858
|
-
__HOST_DEVICE__
|
|
859
|
-
inline
|
|
860
|
-
constexpr
|
|
861
|
-
bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
|
|
862
|
-
{
|
|
863
|
-
return !(x == y);
|
|
864
|
-
}
|
|
865
|
-
template<typename T, unsigned int n, typename U>
|
|
866
|
-
__HOST_DEVICE__
|
|
867
|
-
inline
|
|
868
|
-
constexpr
|
|
869
|
-
bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
|
|
870
|
-
{
|
|
871
|
-
return !(x == y);
|
|
872
|
-
}
|
|
873
|
-
|
|
874
|
-
template<
|
|
875
|
-
typename T,
|
|
876
|
-
unsigned int n,
|
|
877
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
878
|
-
__HOST_DEVICE__
|
|
879
|
-
inline
|
|
880
|
-
constexpr
|
|
881
|
-
HIP_vector_type<T, n> operator%(
|
|
882
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
883
|
-
{
|
|
884
|
-
return HIP_vector_type<T, n>{x} %= y;
|
|
885
|
-
}
|
|
886
|
-
template<
|
|
887
|
-
typename T,
|
|
888
|
-
unsigned int n,
|
|
889
|
-
typename U,
|
|
890
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
891
|
-
__HOST_DEVICE__
|
|
892
|
-
inline
|
|
893
|
-
constexpr
|
|
894
|
-
HIP_vector_type<T, n> operator%(
|
|
895
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
896
|
-
{
|
|
897
|
-
return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
|
|
898
|
-
}
|
|
899
|
-
template<
|
|
900
|
-
typename T,
|
|
901
|
-
unsigned int n,
|
|
902
|
-
typename U,
|
|
903
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
904
|
-
__HOST_DEVICE__
|
|
905
|
-
inline
|
|
906
|
-
constexpr
|
|
907
|
-
HIP_vector_type<T, n> operator%(
|
|
908
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
909
|
-
{
|
|
910
|
-
return HIP_vector_type<T, n>{x} %= y;
|
|
911
|
-
}
|
|
912
|
-
|
|
913
|
-
template<
|
|
914
|
-
typename T,
|
|
915
|
-
unsigned int n,
|
|
916
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
917
|
-
__HOST_DEVICE__
|
|
918
|
-
inline
|
|
919
|
-
constexpr
|
|
920
|
-
HIP_vector_type<T, n> operator^(
|
|
921
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
922
|
-
{
|
|
923
|
-
return HIP_vector_type<T, n>{x} ^= y;
|
|
924
|
-
}
|
|
925
|
-
template<
|
|
926
|
-
typename T,
|
|
927
|
-
unsigned int n,
|
|
928
|
-
typename U,
|
|
929
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
930
|
-
__HOST_DEVICE__
|
|
931
|
-
inline
|
|
932
|
-
constexpr
|
|
933
|
-
HIP_vector_type<T, n> operator^(
|
|
934
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
935
|
-
{
|
|
936
|
-
return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
|
|
937
|
-
}
|
|
938
|
-
template<
|
|
939
|
-
typename T,
|
|
940
|
-
unsigned int n,
|
|
941
|
-
typename U,
|
|
942
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
943
|
-
__HOST_DEVICE__
|
|
944
|
-
inline
|
|
945
|
-
constexpr
|
|
946
|
-
HIP_vector_type<T, n> operator^(
|
|
947
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
948
|
-
{
|
|
949
|
-
return HIP_vector_type<T, n>{x} ^= y;
|
|
950
|
-
}
|
|
951
|
-
|
|
952
|
-
template<
|
|
953
|
-
typename T,
|
|
954
|
-
unsigned int n,
|
|
955
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
956
|
-
__HOST_DEVICE__
|
|
957
|
-
inline
|
|
958
|
-
constexpr
|
|
959
|
-
HIP_vector_type<T, n> operator|(
|
|
960
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
961
|
-
{
|
|
962
|
-
return HIP_vector_type<T, n>{x} |= y;
|
|
963
|
-
}
|
|
964
|
-
template<
|
|
965
|
-
typename T,
|
|
966
|
-
unsigned int n,
|
|
967
|
-
typename U,
|
|
968
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
969
|
-
__HOST_DEVICE__
|
|
970
|
-
inline
|
|
971
|
-
constexpr
|
|
972
|
-
HIP_vector_type<T, n> operator|(
|
|
973
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
974
|
-
{
|
|
975
|
-
return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
|
|
976
|
-
}
|
|
977
|
-
template<
|
|
978
|
-
typename T,
|
|
979
|
-
unsigned int n,
|
|
980
|
-
typename U,
|
|
981
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
982
|
-
__HOST_DEVICE__
|
|
983
|
-
inline
|
|
984
|
-
constexpr
|
|
985
|
-
HIP_vector_type<T, n> operator|(
|
|
986
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
987
|
-
{
|
|
988
|
-
return HIP_vector_type<T, n>{x} |= y;
|
|
989
|
-
}
|
|
990
|
-
|
|
991
|
-
template<
|
|
992
|
-
typename T,
|
|
993
|
-
unsigned int n,
|
|
994
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
995
|
-
__HOST_DEVICE__
|
|
996
|
-
inline
|
|
997
|
-
constexpr
|
|
998
|
-
HIP_vector_type<T, n> operator&(
|
|
999
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
1000
|
-
{
|
|
1001
|
-
return HIP_vector_type<T, n>{x} &= y;
|
|
1002
|
-
}
|
|
1003
|
-
template<
|
|
1004
|
-
typename T,
|
|
1005
|
-
unsigned int n,
|
|
1006
|
-
typename U,
|
|
1007
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
1008
|
-
__HOST_DEVICE__
|
|
1009
|
-
inline
|
|
1010
|
-
constexpr
|
|
1011
|
-
HIP_vector_type<T, n> operator&(
|
|
1012
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
1013
|
-
{
|
|
1014
|
-
return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
|
|
1015
|
-
}
|
|
1016
|
-
template<
|
|
1017
|
-
typename T,
|
|
1018
|
-
unsigned int n,
|
|
1019
|
-
typename U,
|
|
1020
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
1021
|
-
__HOST_DEVICE__
|
|
1022
|
-
inline
|
|
1023
|
-
constexpr
|
|
1024
|
-
HIP_vector_type<T, n> operator&(
|
|
1025
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
1026
|
-
{
|
|
1027
|
-
return HIP_vector_type<T, n>{x} &= y;
|
|
1028
|
-
}
|
|
1029
|
-
|
|
1030
|
-
template<
|
|
1031
|
-
typename T,
|
|
1032
|
-
unsigned int n,
|
|
1033
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
1034
|
-
__HOST_DEVICE__
|
|
1035
|
-
inline
|
|
1036
|
-
constexpr
|
|
1037
|
-
HIP_vector_type<T, n> operator>>(
|
|
1038
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
1039
|
-
{
|
|
1040
|
-
return HIP_vector_type<T, n>{x} >>= y;
|
|
1041
|
-
}
|
|
1042
|
-
template<
|
|
1043
|
-
typename T,
|
|
1044
|
-
unsigned int n,
|
|
1045
|
-
typename U,
|
|
1046
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
1047
|
-
__HOST_DEVICE__
|
|
1048
|
-
inline
|
|
1049
|
-
constexpr
|
|
1050
|
-
HIP_vector_type<T, n> operator>>(
|
|
1051
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
1052
|
-
{
|
|
1053
|
-
return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
|
|
1054
|
-
}
|
|
1055
|
-
template<
|
|
1056
|
-
typename T,
|
|
1057
|
-
unsigned int n,
|
|
1058
|
-
typename U,
|
|
1059
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
1060
|
-
__HOST_DEVICE__
|
|
1061
|
-
inline
|
|
1062
|
-
constexpr
|
|
1063
|
-
HIP_vector_type<T, n> operator>>(
|
|
1064
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
1065
|
-
{
|
|
1066
|
-
return HIP_vector_type<T, n>{x} >>= y;
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
template<
|
|
1070
|
-
typename T,
|
|
1071
|
-
unsigned int n,
|
|
1072
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
1073
|
-
__HOST_DEVICE__
|
|
1074
|
-
inline
|
|
1075
|
-
constexpr
|
|
1076
|
-
HIP_vector_type<T, n> operator<<(
|
|
1077
|
-
const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
|
|
1078
|
-
{
|
|
1079
|
-
return HIP_vector_type<T, n>{x} <<= y;
|
|
1080
|
-
}
|
|
1081
|
-
template<
|
|
1082
|
-
typename T,
|
|
1083
|
-
unsigned int n,
|
|
1084
|
-
typename U,
|
|
1085
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
1086
|
-
__HOST_DEVICE__
|
|
1087
|
-
inline
|
|
1088
|
-
constexpr
|
|
1089
|
-
HIP_vector_type<T, n> operator<<(
|
|
1090
|
-
const HIP_vector_type<T, n>& x, U y) noexcept
|
|
1091
|
-
{
|
|
1092
|
-
return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
|
|
1093
|
-
}
|
|
1094
|
-
template<
|
|
1095
|
-
typename T,
|
|
1096
|
-
unsigned int n,
|
|
1097
|
-
typename U,
|
|
1098
|
-
typename std::enable_if<std::is_arithmetic<U>::value>::type,
|
|
1099
|
-
typename std::enable_if<std::is_integral<T>{}>* = nullptr>
|
|
1100
|
-
__HOST_DEVICE__
|
|
1101
|
-
inline
|
|
1102
|
-
constexpr
|
|
1103
|
-
HIP_vector_type<T, n> operator<<(
|
|
1104
|
-
U x, const HIP_vector_type<T, n>& y) noexcept
|
|
1105
|
-
{
|
|
1106
|
-
return HIP_vector_type<T, n>{x} <<= y;
|
|
1107
|
-
}
|
|
1108
|
-
|
|
1109
|
-
/*
|
|
1110
|
-
* Map HIP_vector_type<U, rankU> to HIP_vector_type<T, rankT>
|
|
1111
|
-
*/
|
|
1112
|
-
template <typename T, unsigned int rankT, typename U, unsigned int rankU>
|
|
1113
|
-
__forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 1 && rankU >= 1),
|
|
1114
|
-
const HIP_vector_type<T, rankT>>::type
|
|
1115
|
-
__hipMapVector(const HIP_vector_type<U, rankU>& u) {
|
|
1116
|
-
return HIP_vector_type<T, rankT>(static_cast<T>(u.x));
|
|
1117
|
-
};
|
|
1118
|
-
|
|
1119
|
-
template <typename T, unsigned int rankT, typename U, unsigned int rankU>
|
|
1120
|
-
__forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU == 1),
|
|
1121
|
-
const HIP_vector_type<T, rankT>>::type
|
|
1122
|
-
__hipMapVector(const HIP_vector_type<U, rankU>& u) {
|
|
1123
|
-
return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0));
|
|
1124
|
-
};
|
|
1125
|
-
|
|
1126
|
-
template <typename T, unsigned int rankT, typename U, unsigned int rankU>
|
|
1127
|
-
__forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU >= 2),
|
|
1128
|
-
const HIP_vector_type<T, rankT>>::type
|
|
1129
|
-
__hipMapVector(const HIP_vector_type<U, rankU>& u) {
|
|
1130
|
-
return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y));
|
|
1131
|
-
};
|
|
1132
|
-
|
|
1133
|
-
template <typename T, unsigned int rankT, typename U, unsigned int rankU>
|
|
1134
|
-
__forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 1),
|
|
1135
|
-
const HIP_vector_type<T, rankT>>::type
|
|
1136
|
-
__hipMapVector(const HIP_vector_type<U, rankU>& u) {
|
|
1137
|
-
return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0),
|
|
1138
|
-
static_cast<T>(0), static_cast<T>(0));
|
|
1139
|
-
};
|
|
1140
|
-
|
|
1141
|
-
template <typename T, unsigned int rankT, typename U, unsigned int rankU>
|
|
1142
|
-
__forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 2),
|
|
1143
|
-
const HIP_vector_type<T, rankT>>::type
|
|
1144
|
-
__hipMapVector(const HIP_vector_type<U, rankU>& u) {
|
|
1145
|
-
return HIP_vector_type<T, rankT>(static_cast<T>(u.x), static_cast<T>(u.y),
|
|
1146
|
-
static_cast<T>(0), static_cast<T>(0));
|
|
1147
|
-
};
|
|
1148
|
-
|
|
1149
|
-
template <typename T, unsigned int rankT, typename U, unsigned int rankU>
|
|
1150
|
-
__forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 4),
|
|
1151
|
-
const HIP_vector_type<T, rankT>>::type
|
|
1152
|
-
__hipMapVector(const HIP_vector_type<U, rankU>& u) {
|
|
1153
|
-
return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y),
|
|
1154
|
-
static_cast<T>(u.z), static_cast<T>(u.w));
|
|
1155
|
-
};
|
|
1156
|
-
|
|
1157
|
-
#define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
|
|
1158
|
-
using CUDA_name##1 = HIP_vector_type<T, 1>;\
|
|
1159
|
-
using CUDA_name##2 = HIP_vector_type<T, 2>;\
|
|
1160
|
-
using CUDA_name##3 = HIP_vector_type<T, 3>;\
|
|
1161
|
-
using CUDA_name##4 = HIP_vector_type<T, 4>;
|
|
1162
|
-
#else
|
|
1163
|
-
#define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
|
|
1164
|
-
typedef struct {\
|
|
1165
|
-
T x;\
|
|
1166
|
-
} CUDA_name##1;\
|
|
1167
|
-
typedef struct {\
|
|
1168
|
-
T x;\
|
|
1169
|
-
T y;\
|
|
1170
|
-
} CUDA_name##2;\
|
|
1171
|
-
typedef struct {\
|
|
1172
|
-
T x;\
|
|
1173
|
-
T y;\
|
|
1174
|
-
T z;\
|
|
1175
|
-
} CUDA_name##3;\
|
|
1176
|
-
typedef struct {\
|
|
1177
|
-
T x;\
|
|
1178
|
-
T y;\
|
|
1179
|
-
T z;\
|
|
1180
|
-
T w;\
|
|
1181
|
-
} CUDA_name##4;
|
|
1182
|
-
#endif
|
|
1183
|
-
|
|
1184
|
-
__MAKE_VECTOR_TYPE__(uchar, unsigned char);
|
|
1185
|
-
__MAKE_VECTOR_TYPE__(char, char);
|
|
1186
|
-
__MAKE_VECTOR_TYPE__(ushort, unsigned short);
|
|
1187
|
-
__MAKE_VECTOR_TYPE__(short, short);
|
|
1188
|
-
__MAKE_VECTOR_TYPE__(uint, unsigned int);
|
|
1189
|
-
__MAKE_VECTOR_TYPE__(int, int);
|
|
1190
|
-
__MAKE_VECTOR_TYPE__(ulong, unsigned long);
|
|
1191
|
-
__MAKE_VECTOR_TYPE__(long, long);
|
|
1192
|
-
__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
|
|
1193
|
-
__MAKE_VECTOR_TYPE__(longlong, long long);
|
|
1194
|
-
__MAKE_VECTOR_TYPE__(float, float);
|
|
1195
|
-
__MAKE_VECTOR_TYPE__(double, double);
|
|
1196
|
-
|
|
1197
|
-
#else // !defined(__has_attribute)
|
|
1198
|
-
|
|
1199
|
-
#if defined(_MSC_VER)
|
|
1200
|
-
#include <mmintrin.h>
|
|
1201
|
-
#include <xmmintrin.h>
|
|
1202
|
-
#include <emmintrin.h>
|
|
1203
|
-
#include <immintrin.h>
|
|
1204
|
-
|
|
1205
|
-
/*
|
|
1206
|
-
this is for compatibility with CUDA as CUDA allows accessing vector components
|
|
1207
|
-
in C++ program with MSVC
|
|
1208
|
-
*/
|
|
1209
|
-
typedef union {
|
|
1210
|
-
struct {
|
|
1211
|
-
char x;
|
|
1212
|
-
};
|
|
1213
|
-
char data;
|
|
1214
|
-
} char1;
|
|
1215
|
-
typedef union {
|
|
1216
|
-
struct {
|
|
1217
|
-
char x;
|
|
1218
|
-
char y;
|
|
1219
|
-
};
|
|
1220
|
-
char data[2];
|
|
1221
|
-
} char2;
|
|
1222
|
-
typedef union {
|
|
1223
|
-
struct {
|
|
1224
|
-
char x;
|
|
1225
|
-
char y;
|
|
1226
|
-
char z;
|
|
1227
|
-
char w;
|
|
1228
|
-
};
|
|
1229
|
-
char data[4];
|
|
1230
|
-
} char4;
|
|
1231
|
-
typedef union {
|
|
1232
|
-
struct {
|
|
1233
|
-
char x;
|
|
1234
|
-
char y;
|
|
1235
|
-
char z;
|
|
1236
|
-
};
|
|
1237
|
-
char data[3];
|
|
1238
|
-
} char3;
|
|
1239
|
-
typedef union {
|
|
1240
|
-
__m64 data;
|
|
1241
|
-
} char8;
|
|
1242
|
-
typedef union {
|
|
1243
|
-
__m128i data;
|
|
1244
|
-
} char16;
|
|
1245
|
-
|
|
1246
|
-
typedef union {
|
|
1247
|
-
struct {
|
|
1248
|
-
unsigned char x;
|
|
1249
|
-
};
|
|
1250
|
-
unsigned char data;
|
|
1251
|
-
} uchar1;
|
|
1252
|
-
typedef union {
|
|
1253
|
-
struct {
|
|
1254
|
-
unsigned char x;
|
|
1255
|
-
unsigned char y;
|
|
1256
|
-
};
|
|
1257
|
-
unsigned char data[2];
|
|
1258
|
-
} uchar2;
|
|
1259
|
-
typedef union {
|
|
1260
|
-
struct {
|
|
1261
|
-
unsigned char x;
|
|
1262
|
-
unsigned char y;
|
|
1263
|
-
unsigned char z;
|
|
1264
|
-
unsigned char w;
|
|
1265
|
-
};
|
|
1266
|
-
unsigned char data[4];
|
|
1267
|
-
} uchar4;
|
|
1268
|
-
typedef union {
|
|
1269
|
-
struct {
|
|
1270
|
-
unsigned char x;
|
|
1271
|
-
unsigned char y;
|
|
1272
|
-
unsigned char z;
|
|
1273
|
-
};
|
|
1274
|
-
unsigned char data[3];
|
|
1275
|
-
} uchar3;
|
|
1276
|
-
typedef union {
|
|
1277
|
-
__m64 data;
|
|
1278
|
-
} uchar8;
|
|
1279
|
-
typedef union {
|
|
1280
|
-
__m128i data;
|
|
1281
|
-
} uchar16;
|
|
1282
|
-
|
|
1283
|
-
typedef union {
|
|
1284
|
-
struct {
|
|
1285
|
-
short x;
|
|
1286
|
-
};
|
|
1287
|
-
short data;
|
|
1288
|
-
} short1;
|
|
1289
|
-
typedef union {
|
|
1290
|
-
struct {
|
|
1291
|
-
short x;
|
|
1292
|
-
short y;
|
|
1293
|
-
};
|
|
1294
|
-
short data[2];
|
|
1295
|
-
} short2;
|
|
1296
|
-
typedef union {
|
|
1297
|
-
struct {
|
|
1298
|
-
short x;
|
|
1299
|
-
short y;
|
|
1300
|
-
short z;
|
|
1301
|
-
short w;
|
|
1302
|
-
};
|
|
1303
|
-
__m64 data;
|
|
1304
|
-
} short4;
|
|
1305
|
-
typedef union {
|
|
1306
|
-
struct {
|
|
1307
|
-
short x;
|
|
1308
|
-
short y;
|
|
1309
|
-
short z;
|
|
1310
|
-
};
|
|
1311
|
-
short data[3];
|
|
1312
|
-
} short3;
|
|
1313
|
-
typedef union {
|
|
1314
|
-
__m128i data;
|
|
1315
|
-
} short8;
|
|
1316
|
-
typedef union {
|
|
1317
|
-
__m128i data[2];
|
|
1318
|
-
} short16;
|
|
1319
|
-
|
|
1320
|
-
typedef union {
|
|
1321
|
-
struct {
|
|
1322
|
-
unsigned short x;
|
|
1323
|
-
};
|
|
1324
|
-
unsigned short data;
|
|
1325
|
-
} ushort1;
|
|
1326
|
-
typedef union {
|
|
1327
|
-
struct {
|
|
1328
|
-
unsigned short x;
|
|
1329
|
-
unsigned short y;
|
|
1330
|
-
};
|
|
1331
|
-
unsigned short data[2];
|
|
1332
|
-
} ushort2;
|
|
1333
|
-
typedef union {
|
|
1334
|
-
struct {
|
|
1335
|
-
unsigned short x;
|
|
1336
|
-
unsigned short y;
|
|
1337
|
-
unsigned short z;
|
|
1338
|
-
unsigned short w;
|
|
1339
|
-
};
|
|
1340
|
-
__m64 data;
|
|
1341
|
-
} ushort4;
|
|
1342
|
-
typedef union {
|
|
1343
|
-
struct {
|
|
1344
|
-
unsigned short x;
|
|
1345
|
-
unsigned short y;
|
|
1346
|
-
unsigned short z;
|
|
1347
|
-
};
|
|
1348
|
-
unsigned short data[3];
|
|
1349
|
-
} ushort3;
|
|
1350
|
-
typedef union {
|
|
1351
|
-
__m128i data;
|
|
1352
|
-
} ushort8;
|
|
1353
|
-
typedef union {
|
|
1354
|
-
__m128i data[2];
|
|
1355
|
-
} ushort16;
|
|
1356
|
-
|
|
1357
|
-
typedef union {
|
|
1358
|
-
struct {
|
|
1359
|
-
int x;
|
|
1360
|
-
};
|
|
1361
|
-
int data;
|
|
1362
|
-
} int1;
|
|
1363
|
-
typedef union {
|
|
1364
|
-
struct {
|
|
1365
|
-
int x;
|
|
1366
|
-
int y;
|
|
1367
|
-
};
|
|
1368
|
-
__m64 data;
|
|
1369
|
-
} int2;
|
|
1370
|
-
typedef union {
|
|
1371
|
-
struct {
|
|
1372
|
-
int x;
|
|
1373
|
-
int y;
|
|
1374
|
-
int z;
|
|
1375
|
-
int w;
|
|
1376
|
-
};
|
|
1377
|
-
__m128i data;
|
|
1378
|
-
} int4;
|
|
1379
|
-
typedef union {
|
|
1380
|
-
struct {
|
|
1381
|
-
int x;
|
|
1382
|
-
int y;
|
|
1383
|
-
int z;
|
|
1384
|
-
};
|
|
1385
|
-
int data[3];
|
|
1386
|
-
} int3;
|
|
1387
|
-
typedef union {
|
|
1388
|
-
__m128i data[2];
|
|
1389
|
-
} int8;
|
|
1390
|
-
typedef union {
|
|
1391
|
-
__m128i data[4];
|
|
1392
|
-
} int16;
|
|
1393
|
-
|
|
1394
|
-
typedef union {
|
|
1395
|
-
struct {
|
|
1396
|
-
unsigned int x;
|
|
1397
|
-
};
|
|
1398
|
-
unsigned int data;
|
|
1399
|
-
} uint1;
|
|
1400
|
-
typedef union {
|
|
1401
|
-
struct {
|
|
1402
|
-
unsigned int x;
|
|
1403
|
-
unsigned int y;
|
|
1404
|
-
};
|
|
1405
|
-
__m64 data;
|
|
1406
|
-
} uint2;
|
|
1407
|
-
typedef union {
|
|
1408
|
-
struct {
|
|
1409
|
-
unsigned int x;
|
|
1410
|
-
unsigned int y;
|
|
1411
|
-
unsigned int z;
|
|
1412
|
-
unsigned int w;
|
|
1413
|
-
};
|
|
1414
|
-
__m128i data;
|
|
1415
|
-
} uint4;
|
|
1416
|
-
typedef union {
|
|
1417
|
-
struct {
|
|
1418
|
-
unsigned int x;
|
|
1419
|
-
unsigned int y;
|
|
1420
|
-
unsigned int z;
|
|
1421
|
-
};
|
|
1422
|
-
unsigned int data[3];
|
|
1423
|
-
} uint3;
|
|
1424
|
-
typedef union {
|
|
1425
|
-
__m128i data[2];
|
|
1426
|
-
} uint8;
|
|
1427
|
-
typedef union {
|
|
1428
|
-
__m128i data[4];
|
|
1429
|
-
} uint16;
|
|
1430
|
-
|
|
1431
|
-
typedef union {
|
|
1432
|
-
struct {
|
|
1433
|
-
int x;
|
|
1434
|
-
};
|
|
1435
|
-
int data;
|
|
1436
|
-
} long1;
|
|
1437
|
-
typedef union {
|
|
1438
|
-
struct {
|
|
1439
|
-
int x;
|
|
1440
|
-
int y;
|
|
1441
|
-
};
|
|
1442
|
-
__m64 data;
|
|
1443
|
-
} long2;
|
|
1444
|
-
typedef union {
|
|
1445
|
-
struct {
|
|
1446
|
-
int x;
|
|
1447
|
-
int y;
|
|
1448
|
-
int z;
|
|
1449
|
-
int w;
|
|
1450
|
-
};
|
|
1451
|
-
__m128i data;
|
|
1452
|
-
} long4;
|
|
1453
|
-
typedef union {
|
|
1454
|
-
struct {
|
|
1455
|
-
int x;
|
|
1456
|
-
int y;
|
|
1457
|
-
int z;
|
|
1458
|
-
};
|
|
1459
|
-
int data[3];
|
|
1460
|
-
} long3;
|
|
1461
|
-
typedef union {
|
|
1462
|
-
__m128i data[2];
|
|
1463
|
-
} long8;
|
|
1464
|
-
typedef union {
|
|
1465
|
-
__m128i data[4];
|
|
1466
|
-
} long16;
|
|
1467
|
-
|
|
1468
|
-
typedef union {
|
|
1469
|
-
struct {
|
|
1470
|
-
unsigned int x;
|
|
1471
|
-
};
|
|
1472
|
-
unsigned int data;
|
|
1473
|
-
} ulong1;
|
|
1474
|
-
typedef union {
|
|
1475
|
-
struct {
|
|
1476
|
-
unsigned int x;
|
|
1477
|
-
unsigned int y;
|
|
1478
|
-
};
|
|
1479
|
-
__m64 data;
|
|
1480
|
-
} ulong2;
|
|
1481
|
-
typedef union {
|
|
1482
|
-
struct {
|
|
1483
|
-
unsigned int x;
|
|
1484
|
-
unsigned int y;
|
|
1485
|
-
unsigned int z;
|
|
1486
|
-
unsigned int w;
|
|
1487
|
-
};
|
|
1488
|
-
__m128i data;
|
|
1489
|
-
} ulong4;
|
|
1490
|
-
typedef union {
|
|
1491
|
-
struct {
|
|
1492
|
-
unsigned int x;
|
|
1493
|
-
unsigned int y;
|
|
1494
|
-
unsigned int z;
|
|
1495
|
-
};
|
|
1496
|
-
unsigned int data[3];
|
|
1497
|
-
} ulong3;
|
|
1498
|
-
typedef union {
|
|
1499
|
-
__m128i data[2];
|
|
1500
|
-
} ulong8;
|
|
1501
|
-
typedef union {
|
|
1502
|
-
__m128i data[4];
|
|
1503
|
-
} ulong16;
|
|
1504
|
-
|
|
1505
|
-
typedef union {
|
|
1506
|
-
struct {
|
|
1507
|
-
long long x;
|
|
1508
|
-
};
|
|
1509
|
-
__m64 data;
|
|
1510
|
-
} longlong1;
|
|
1511
|
-
typedef union {
|
|
1512
|
-
struct {
|
|
1513
|
-
long long x;
|
|
1514
|
-
long long y;
|
|
1515
|
-
};
|
|
1516
|
-
__m128i data;
|
|
1517
|
-
} longlong2;
|
|
1518
|
-
typedef union {
|
|
1519
|
-
struct {
|
|
1520
|
-
long long x;
|
|
1521
|
-
long long y;
|
|
1522
|
-
long long z;
|
|
1523
|
-
long long w;
|
|
1524
|
-
};
|
|
1525
|
-
__m128i data[2];
|
|
1526
|
-
} longlong4;
|
|
1527
|
-
typedef union {
|
|
1528
|
-
struct {
|
|
1529
|
-
long long x;
|
|
1530
|
-
long long y;
|
|
1531
|
-
long long z;
|
|
1532
|
-
};
|
|
1533
|
-
__m64 data[3];
|
|
1534
|
-
} longlong3;
|
|
1535
|
-
typedef union {
|
|
1536
|
-
__m128i data[4];
|
|
1537
|
-
} longlong8;
|
|
1538
|
-
typedef union {
|
|
1539
|
-
__m128i data[8];
|
|
1540
|
-
} longlong16;
|
|
1541
|
-
|
|
1542
|
-
typedef union {
|
|
1543
|
-
struct {
|
|
1544
|
-
__m64 x;
|
|
1545
|
-
};
|
|
1546
|
-
__m64 data;
|
|
1547
|
-
} ulonglong1;
|
|
1548
|
-
typedef union {
|
|
1549
|
-
struct {
|
|
1550
|
-
__m64 x;
|
|
1551
|
-
__m64 y;
|
|
1552
|
-
};
|
|
1553
|
-
__m128i data;
|
|
1554
|
-
} ulonglong2;
|
|
1555
|
-
typedef union {
|
|
1556
|
-
struct {
|
|
1557
|
-
__m64 x;
|
|
1558
|
-
__m64 y;
|
|
1559
|
-
__m64 z;
|
|
1560
|
-
__m64 w;
|
|
1561
|
-
};
|
|
1562
|
-
__m128i data[2];
|
|
1563
|
-
} ulonglong4;
|
|
1564
|
-
typedef union {
|
|
1565
|
-
struct {
|
|
1566
|
-
__m64 x;
|
|
1567
|
-
__m64 y;
|
|
1568
|
-
__m64 z;
|
|
1569
|
-
};
|
|
1570
|
-
__m64 data[3];
|
|
1571
|
-
} ulonglong3;
|
|
1572
|
-
typedef union {
|
|
1573
|
-
__m128i data[4];
|
|
1574
|
-
} ulonglong8;
|
|
1575
|
-
typedef union {
|
|
1576
|
-
__m128i data[8];
|
|
1577
|
-
} ulonglong16;
|
|
1578
|
-
|
|
1579
|
-
typedef union {
|
|
1580
|
-
struct {
|
|
1581
|
-
float x;
|
|
1582
|
-
};
|
|
1583
|
-
float data;
|
|
1584
|
-
} float1;
|
|
1585
|
-
typedef union {
|
|
1586
|
-
struct {
|
|
1587
|
-
float x;
|
|
1588
|
-
float y;
|
|
1589
|
-
};
|
|
1590
|
-
__m64 data;
|
|
1591
|
-
} float2;
|
|
1592
|
-
typedef union {
|
|
1593
|
-
struct {
|
|
1594
|
-
float x;
|
|
1595
|
-
float y;
|
|
1596
|
-
float z;
|
|
1597
|
-
float w;
|
|
1598
|
-
};
|
|
1599
|
-
__m128 data;
|
|
1600
|
-
} float4;
|
|
1601
|
-
typedef union {
|
|
1602
|
-
struct {
|
|
1603
|
-
float x;
|
|
1604
|
-
float y;
|
|
1605
|
-
float z;
|
|
1606
|
-
};
|
|
1607
|
-
float data[3];
|
|
1608
|
-
} float3;
|
|
1609
|
-
typedef union {
|
|
1610
|
-
__m256 data;
|
|
1611
|
-
} float8;
|
|
1612
|
-
typedef union {
|
|
1613
|
-
__m256 data[2];
|
|
1614
|
-
} float16;
|
|
1615
|
-
|
|
1616
|
-
typedef union {
|
|
1617
|
-
struct {
|
|
1618
|
-
double x;
|
|
1619
|
-
};
|
|
1620
|
-
double data;
|
|
1621
|
-
} double1;
|
|
1622
|
-
typedef union {
|
|
1623
|
-
struct {
|
|
1624
|
-
double x;
|
|
1625
|
-
double y;
|
|
1626
|
-
};
|
|
1627
|
-
__m128d data;
|
|
1628
|
-
} double2;
|
|
1629
|
-
typedef union {
|
|
1630
|
-
struct {
|
|
1631
|
-
double x;
|
|
1632
|
-
double y;
|
|
1633
|
-
double z;
|
|
1634
|
-
double w;
|
|
1635
|
-
};
|
|
1636
|
-
__m256d data;
|
|
1637
|
-
} double4;
|
|
1638
|
-
typedef union {
|
|
1639
|
-
struct {
|
|
1640
|
-
double x;
|
|
1641
|
-
double y;
|
|
1642
|
-
double z;
|
|
1643
|
-
};
|
|
1644
|
-
double data[3];
|
|
1645
|
-
} double3;
|
|
1646
|
-
typedef union {
|
|
1647
|
-
__m256d data[2];
|
|
1648
|
-
} double8;
|
|
1649
|
-
typedef union {
|
|
1650
|
-
__m256d data[4];
|
|
1651
|
-
} double16;
|
|
1652
|
-
|
|
1653
|
-
#else // !defined(_MSC_VER)
|
|
1654
|
-
|
|
1655
|
-
/*
|
|
1656
|
-
this is for compatibility with CUDA as CUDA allows accessing vector components
|
|
1657
|
-
in C++ program with MSVC
|
|
1658
|
-
*/
|
|
1659
|
-
typedef union {
|
|
1660
|
-
struct {
|
|
1661
|
-
char x;
|
|
1662
|
-
};
|
|
1663
|
-
char data;
|
|
1664
|
-
} char1;
|
|
1665
|
-
typedef union {
|
|
1666
|
-
struct {
|
|
1667
|
-
char x;
|
|
1668
|
-
char y;
|
|
1669
|
-
};
|
|
1670
|
-
char data[2];
|
|
1671
|
-
} char2;
|
|
1672
|
-
typedef union {
|
|
1673
|
-
struct {
|
|
1674
|
-
char x;
|
|
1675
|
-
char y;
|
|
1676
|
-
char z;
|
|
1677
|
-
char w;
|
|
1678
|
-
};
|
|
1679
|
-
char data[4];
|
|
1680
|
-
} char4;
|
|
1681
|
-
typedef union {
|
|
1682
|
-
char data[8];
|
|
1683
|
-
} char8;
|
|
1684
|
-
typedef union {
|
|
1685
|
-
char data[16];
|
|
1686
|
-
} char16;
|
|
1687
|
-
typedef union {
|
|
1688
|
-
struct {
|
|
1689
|
-
char x;
|
|
1690
|
-
char y;
|
|
1691
|
-
char z;
|
|
1692
|
-
};
|
|
1693
|
-
char data[3];
|
|
1694
|
-
} char3;
|
|
1695
|
-
|
|
1696
|
-
typedef union {
|
|
1697
|
-
struct {
|
|
1698
|
-
unsigned char x;
|
|
1699
|
-
};
|
|
1700
|
-
unsigned char data;
|
|
1701
|
-
} uchar1;
|
|
1702
|
-
typedef union {
|
|
1703
|
-
struct {
|
|
1704
|
-
unsigned char x;
|
|
1705
|
-
unsigned char y;
|
|
1706
|
-
};
|
|
1707
|
-
unsigned char data[2];
|
|
1708
|
-
} uchar2;
|
|
1709
|
-
typedef union {
|
|
1710
|
-
struct {
|
|
1711
|
-
unsigned char x;
|
|
1712
|
-
unsigned char y;
|
|
1713
|
-
unsigned char z;
|
|
1714
|
-
unsigned char w;
|
|
1715
|
-
};
|
|
1716
|
-
unsigned char data[4];
|
|
1717
|
-
} uchar4;
|
|
1718
|
-
typedef union {
|
|
1719
|
-
unsigned char data[8];
|
|
1720
|
-
} uchar8;
|
|
1721
|
-
typedef union {
|
|
1722
|
-
unsigned char data[16];
|
|
1723
|
-
} uchar16;
|
|
1724
|
-
typedef union {
|
|
1725
|
-
struct {
|
|
1726
|
-
unsigned char x;
|
|
1727
|
-
unsigned char y;
|
|
1728
|
-
unsigned char z;
|
|
1729
|
-
};
|
|
1730
|
-
unsigned char data[3];
|
|
1731
|
-
} uchar3;
|
|
1732
|
-
|
|
1733
|
-
typedef union {
|
|
1734
|
-
struct {
|
|
1735
|
-
short x;
|
|
1736
|
-
};
|
|
1737
|
-
short data;
|
|
1738
|
-
} short1;
|
|
1739
|
-
typedef union {
|
|
1740
|
-
struct {
|
|
1741
|
-
short x;
|
|
1742
|
-
short y;
|
|
1743
|
-
};
|
|
1744
|
-
short data[2];
|
|
1745
|
-
} short2;
|
|
1746
|
-
typedef union {
|
|
1747
|
-
struct {
|
|
1748
|
-
short x;
|
|
1749
|
-
short y;
|
|
1750
|
-
short z;
|
|
1751
|
-
short w;
|
|
1752
|
-
};
|
|
1753
|
-
short data[4];
|
|
1754
|
-
} short4;
|
|
1755
|
-
typedef union {
|
|
1756
|
-
short data[8];
|
|
1757
|
-
} short8;
|
|
1758
|
-
typedef union {
|
|
1759
|
-
short data[16];
|
|
1760
|
-
} short16;
|
|
1761
|
-
typedef union {
|
|
1762
|
-
struct {
|
|
1763
|
-
short x;
|
|
1764
|
-
short y;
|
|
1765
|
-
short z;
|
|
1766
|
-
};
|
|
1767
|
-
short data[3];
|
|
1768
|
-
} short3;
|
|
1769
|
-
|
|
1770
|
-
typedef union {
|
|
1771
|
-
struct {
|
|
1772
|
-
unsigned short x;
|
|
1773
|
-
};
|
|
1774
|
-
unsigned short data;
|
|
1775
|
-
} ushort1;
|
|
1776
|
-
typedef union {
|
|
1777
|
-
struct {
|
|
1778
|
-
unsigned short x;
|
|
1779
|
-
unsigned short y;
|
|
1780
|
-
};
|
|
1781
|
-
unsigned short data[2];
|
|
1782
|
-
} ushort2;
|
|
1783
|
-
typedef union {
|
|
1784
|
-
struct {
|
|
1785
|
-
unsigned short x;
|
|
1786
|
-
unsigned short y;
|
|
1787
|
-
unsigned short z;
|
|
1788
|
-
unsigned short w;
|
|
1789
|
-
};
|
|
1790
|
-
unsigned short data[4];
|
|
1791
|
-
} ushort4;
|
|
1792
|
-
typedef union {
|
|
1793
|
-
unsigned short data[8];
|
|
1794
|
-
} ushort8;
|
|
1795
|
-
typedef union {
|
|
1796
|
-
unsigned short data[16];
|
|
1797
|
-
} ushort16;
|
|
1798
|
-
typedef union {
|
|
1799
|
-
struct {
|
|
1800
|
-
unsigned short x;
|
|
1801
|
-
unsigned short y;
|
|
1802
|
-
unsigned short z;
|
|
1803
|
-
};
|
|
1804
|
-
unsigned short data[3];
|
|
1805
|
-
} ushort3;
|
|
1806
|
-
|
|
1807
|
-
typedef union {
|
|
1808
|
-
struct {
|
|
1809
|
-
int x;
|
|
1810
|
-
};
|
|
1811
|
-
int data;
|
|
1812
|
-
} int1;
|
|
1813
|
-
typedef union {
|
|
1814
|
-
struct {
|
|
1815
|
-
int x;
|
|
1816
|
-
int y;
|
|
1817
|
-
};
|
|
1818
|
-
int data[2];
|
|
1819
|
-
} int2;
|
|
1820
|
-
typedef union {
|
|
1821
|
-
struct {
|
|
1822
|
-
int x;
|
|
1823
|
-
int y;
|
|
1824
|
-
int z;
|
|
1825
|
-
int w;
|
|
1826
|
-
};
|
|
1827
|
-
int data[4];
|
|
1828
|
-
} int4;
|
|
1829
|
-
typedef union {
|
|
1830
|
-
int data[8];
|
|
1831
|
-
} int8;
|
|
1832
|
-
typedef union {
|
|
1833
|
-
int data[16];
|
|
1834
|
-
} int16;
|
|
1835
|
-
typedef union {
|
|
1836
|
-
struct {
|
|
1837
|
-
int x;
|
|
1838
|
-
int y;
|
|
1839
|
-
int z;
|
|
1840
|
-
};
|
|
1841
|
-
int data[3];
|
|
1842
|
-
} int3;
|
|
1843
|
-
|
|
1844
|
-
typedef union {
|
|
1845
|
-
struct {
|
|
1846
|
-
unsigned int x;
|
|
1847
|
-
};
|
|
1848
|
-
unsigned int data;
|
|
1849
|
-
} uint1;
|
|
1850
|
-
typedef union {
|
|
1851
|
-
struct {
|
|
1852
|
-
unsigned int x;
|
|
1853
|
-
unsigned int y;
|
|
1854
|
-
};
|
|
1855
|
-
unsigned int data[2];
|
|
1856
|
-
} uint2;
|
|
1857
|
-
typedef union {
|
|
1858
|
-
struct {
|
|
1859
|
-
unsigned int x;
|
|
1860
|
-
unsigned int y;
|
|
1861
|
-
unsigned int z;
|
|
1862
|
-
unsigned int w;
|
|
1863
|
-
};
|
|
1864
|
-
unsigned int data[4];
|
|
1865
|
-
} uint4;
|
|
1866
|
-
typedef union {
|
|
1867
|
-
unsigned int data[8];
|
|
1868
|
-
} uint8;
|
|
1869
|
-
typedef union {
|
|
1870
|
-
unsigned int data[16];
|
|
1871
|
-
} uint16;
|
|
1872
|
-
typedef union {
|
|
1873
|
-
struct {
|
|
1874
|
-
unsigned int x;
|
|
1875
|
-
unsigned int y;
|
|
1876
|
-
unsigned int z;
|
|
1877
|
-
};
|
|
1878
|
-
unsigned int data[3];
|
|
1879
|
-
} uint3;
|
|
1880
|
-
|
|
1881
|
-
typedef union {
|
|
1882
|
-
struct {
|
|
1883
|
-
long x;
|
|
1884
|
-
};
|
|
1885
|
-
long data;
|
|
1886
|
-
} long1;
|
|
1887
|
-
typedef union {
|
|
1888
|
-
struct {
|
|
1889
|
-
long x;
|
|
1890
|
-
long y;
|
|
1891
|
-
};
|
|
1892
|
-
long data[2];
|
|
1893
|
-
} long2;
|
|
1894
|
-
typedef union {
|
|
1895
|
-
struct {
|
|
1896
|
-
long x;
|
|
1897
|
-
long y;
|
|
1898
|
-
long z;
|
|
1899
|
-
long w;
|
|
1900
|
-
};
|
|
1901
|
-
long data[4];
|
|
1902
|
-
} long4;
|
|
1903
|
-
typedef union {
|
|
1904
|
-
long data[8];
|
|
1905
|
-
} long8;
|
|
1906
|
-
typedef union {
|
|
1907
|
-
long data[16];
|
|
1908
|
-
} long16;
|
|
1909
|
-
typedef union {
|
|
1910
|
-
struct {
|
|
1911
|
-
long x;
|
|
1912
|
-
long y;
|
|
1913
|
-
long z;
|
|
1914
|
-
};
|
|
1915
|
-
long data[3];
|
|
1916
|
-
} long3;
|
|
1917
|
-
|
|
1918
|
-
typedef union {
|
|
1919
|
-
struct {
|
|
1920
|
-
unsigned long x;
|
|
1921
|
-
};
|
|
1922
|
-
unsigned long data;
|
|
1923
|
-
} ulong1;
|
|
1924
|
-
typedef union {
|
|
1925
|
-
struct {
|
|
1926
|
-
unsigned long x;
|
|
1927
|
-
unsigned long y;
|
|
1928
|
-
};
|
|
1929
|
-
unsigned long data[2];
|
|
1930
|
-
} ulong2;
|
|
1931
|
-
typedef union {
|
|
1932
|
-
struct {
|
|
1933
|
-
unsigned long x;
|
|
1934
|
-
unsigned long y;
|
|
1935
|
-
unsigned long z;
|
|
1936
|
-
unsigned long w;
|
|
1937
|
-
};
|
|
1938
|
-
unsigned long data[4];
|
|
1939
|
-
} ulong4;
|
|
1940
|
-
typedef union {
|
|
1941
|
-
unsigned long data[8];
|
|
1942
|
-
} ulong8;
|
|
1943
|
-
typedef union {
|
|
1944
|
-
unsigned long data[16];
|
|
1945
|
-
} ulong16;
|
|
1946
|
-
typedef union {
|
|
1947
|
-
struct {
|
|
1948
|
-
unsigned long x;
|
|
1949
|
-
unsigned long y;
|
|
1950
|
-
unsigned long z;
|
|
1951
|
-
};
|
|
1952
|
-
unsigned long data[3];
|
|
1953
|
-
} ulong3;
|
|
1954
|
-
|
|
1955
|
-
typedef union {
|
|
1956
|
-
struct {
|
|
1957
|
-
long long x;
|
|
1958
|
-
};
|
|
1959
|
-
long long data;
|
|
1960
|
-
} longlong1;
|
|
1961
|
-
typedef union {
|
|
1962
|
-
struct {
|
|
1963
|
-
long long x;
|
|
1964
|
-
long long y;
|
|
1965
|
-
};
|
|
1966
|
-
long long data[2];
|
|
1967
|
-
} longlong2;
|
|
1968
|
-
typedef union {
|
|
1969
|
-
struct {
|
|
1970
|
-
long long x;
|
|
1971
|
-
long long y;
|
|
1972
|
-
long long z;
|
|
1973
|
-
long long w;
|
|
1974
|
-
};
|
|
1975
|
-
long long data[4];
|
|
1976
|
-
} longlong4;
|
|
1977
|
-
typedef union {
|
|
1978
|
-
long long data[8];
|
|
1979
|
-
} longlong8;
|
|
1980
|
-
typedef union {
|
|
1981
|
-
long long data[16];
|
|
1982
|
-
} longlong16;
|
|
1983
|
-
typedef union {
|
|
1984
|
-
struct {
|
|
1985
|
-
long long x;
|
|
1986
|
-
long long y;
|
|
1987
|
-
long long z;
|
|
1988
|
-
};
|
|
1989
|
-
long long data[3];
|
|
1990
|
-
} longlong3;
|
|
1991
|
-
|
|
1992
|
-
typedef union {
|
|
1993
|
-
struct {
|
|
1994
|
-
unsigned long long x;
|
|
1995
|
-
};
|
|
1996
|
-
unsigned long long data;
|
|
1997
|
-
} ulonglong1;
|
|
1998
|
-
typedef union {
|
|
1999
|
-
struct {
|
|
2000
|
-
unsigned long long x;
|
|
2001
|
-
unsigned long long y;
|
|
2002
|
-
};
|
|
2003
|
-
unsigned long long data[2];
|
|
2004
|
-
} ulonglong2;
|
|
2005
|
-
typedef union {
|
|
2006
|
-
struct {
|
|
2007
|
-
unsigned long long x;
|
|
2008
|
-
unsigned long long y;
|
|
2009
|
-
unsigned long long z;
|
|
2010
|
-
unsigned long long w;
|
|
2011
|
-
};
|
|
2012
|
-
unsigned long long data[4];
|
|
2013
|
-
} ulonglong4;
|
|
2014
|
-
typedef union {
|
|
2015
|
-
unsigned long long data[8];
|
|
2016
|
-
} ulonglong8;
|
|
2017
|
-
typedef union {
|
|
2018
|
-
unsigned long long data[16];
|
|
2019
|
-
} ulonglong16;
|
|
2020
|
-
typedef union {
|
|
2021
|
-
struct {
|
|
2022
|
-
unsigned long long x;
|
|
2023
|
-
unsigned long long y;
|
|
2024
|
-
unsigned long long z;
|
|
2025
|
-
};
|
|
2026
|
-
unsigned long long data[3];
|
|
2027
|
-
} ulonglong3;
|
|
2028
|
-
|
|
2029
|
-
typedef union {
|
|
2030
|
-
struct {
|
|
2031
|
-
float x;
|
|
2032
|
-
};
|
|
2033
|
-
float data;
|
|
2034
|
-
} float1;
|
|
2035
|
-
typedef union {
|
|
2036
|
-
struct {
|
|
2037
|
-
float x;
|
|
2038
|
-
float y;
|
|
2039
|
-
};
|
|
2040
|
-
float data[2];
|
|
2041
|
-
} float2;
|
|
2042
|
-
typedef union {
|
|
2043
|
-
struct {
|
|
2044
|
-
float x;
|
|
2045
|
-
float y;
|
|
2046
|
-
float z;
|
|
2047
|
-
float w;
|
|
2048
|
-
};
|
|
2049
|
-
float data[4];
|
|
2050
|
-
} float4;
|
|
2051
|
-
typedef union {
|
|
2052
|
-
float data[8];
|
|
2053
|
-
} float8;
|
|
2054
|
-
typedef union {
|
|
2055
|
-
float data[16];
|
|
2056
|
-
} float16;
|
|
2057
|
-
typedef union {
|
|
2058
|
-
struct {
|
|
2059
|
-
float x;
|
|
2060
|
-
float y;
|
|
2061
|
-
float z;
|
|
2062
|
-
};
|
|
2063
|
-
float data[3];
|
|
2064
|
-
} float3;
|
|
2065
|
-
|
|
2066
|
-
typedef union {
|
|
2067
|
-
struct {
|
|
2068
|
-
double x;
|
|
2069
|
-
};
|
|
2070
|
-
double data;
|
|
2071
|
-
} double1;
|
|
2072
|
-
typedef union {
|
|
2073
|
-
struct {
|
|
2074
|
-
double x;
|
|
2075
|
-
double y;
|
|
2076
|
-
};
|
|
2077
|
-
double data[2];
|
|
2078
|
-
} double2;
|
|
2079
|
-
typedef union {
|
|
2080
|
-
struct {
|
|
2081
|
-
double x;
|
|
2082
|
-
double y;
|
|
2083
|
-
double z;
|
|
2084
|
-
double w;
|
|
2085
|
-
};
|
|
2086
|
-
double data[4];
|
|
2087
|
-
} double4;
|
|
2088
|
-
typedef union {
|
|
2089
|
-
double data[8];
|
|
2090
|
-
} double8;
|
|
2091
|
-
typedef union {
|
|
2092
|
-
double data[16];
|
|
2093
|
-
} double16;
|
|
2094
|
-
typedef union {
|
|
2095
|
-
struct {
|
|
2096
|
-
double x;
|
|
2097
|
-
double y;
|
|
2098
|
-
double z;
|
|
2099
|
-
};
|
|
2100
|
-
double data[3];
|
|
2101
|
-
} double3;
|
|
2102
|
-
|
|
2103
|
-
#endif // defined(_MSC_VER)
|
|
2104
|
-
#endif // defined(__has_attribute)
|
|
2105
|
-
|
|
2106
|
-
#ifdef __cplusplus
|
|
2107
|
-
#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
|
|
2108
|
-
static inline __HOST_DEVICE__ type make_##type(comp x) { \
|
|
2109
|
-
type r{x}; \
|
|
2110
|
-
return r; \
|
|
2111
|
-
}
|
|
2112
|
-
|
|
2113
|
-
#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
|
|
2114
|
-
static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
|
|
2115
|
-
type r{x, y}; \
|
|
2116
|
-
return r; \
|
|
2117
|
-
}
|
|
2118
|
-
|
|
2119
|
-
#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
|
|
2120
|
-
static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
|
|
2121
|
-
type r{x, y, z}; \
|
|
2122
|
-
return r; \
|
|
2123
|
-
}
|
|
2124
|
-
|
|
2125
|
-
#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
|
|
2126
|
-
static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
|
|
2127
|
-
type r{x, y, z, w}; \
|
|
2128
|
-
return r; \
|
|
2129
|
-
}
|
|
2130
|
-
#else
|
|
2131
|
-
#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
|
|
2132
|
-
static inline __HOST_DEVICE__ type make_##type(comp x) { \
|
|
2133
|
-
type r; \
|
|
2134
|
-
r.x = x; \
|
|
2135
|
-
return r; \
|
|
2136
|
-
}
|
|
2137
|
-
|
|
2138
|
-
#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
|
|
2139
|
-
static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
|
|
2140
|
-
type r; \
|
|
2141
|
-
r.x = x; \
|
|
2142
|
-
r.y = y; \
|
|
2143
|
-
return r; \
|
|
2144
|
-
}
|
|
2145
|
-
|
|
2146
|
-
#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
|
|
2147
|
-
static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
|
|
2148
|
-
type r; \
|
|
2149
|
-
r.x = x; \
|
|
2150
|
-
r.y = y; \
|
|
2151
|
-
r.z = z; \
|
|
2152
|
-
return r; \
|
|
2153
|
-
}
|
|
2154
|
-
|
|
2155
|
-
#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
|
|
2156
|
-
static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
|
|
2157
|
-
type r; \
|
|
2158
|
-
r.x = x; \
|
|
2159
|
-
r.y = y; \
|
|
2160
|
-
r.z = z; \
|
|
2161
|
-
r.w = w; \
|
|
2162
|
-
return r; \
|
|
2163
|
-
}
|
|
2164
|
-
#endif
|
|
2165
|
-
|
|
2166
|
-
DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
|
|
2167
|
-
DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
|
|
2168
|
-
DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
|
|
2169
|
-
DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
|
|
2170
|
-
|
|
2171
|
-
DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
|
|
2172
|
-
DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
|
|
2173
|
-
DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
|
|
2174
|
-
DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
|
|
2175
|
-
|
|
2176
|
-
DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
|
|
2177
|
-
DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
|
|
2178
|
-
DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
|
|
2179
|
-
DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
|
|
2180
|
-
|
|
2181
|
-
DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
|
|
2182
|
-
DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
|
|
2183
|
-
DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
|
|
2184
|
-
DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
|
|
2185
|
-
|
|
2186
|
-
DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
|
|
2187
|
-
DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
|
|
2188
|
-
DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
|
|
2189
|
-
DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
|
|
2190
|
-
|
|
2191
|
-
DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
|
|
2192
|
-
DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
|
|
2193
|
-
DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
|
|
2194
|
-
DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
|
|
2195
|
-
|
|
2196
|
-
DECLOP_MAKE_ONE_COMPONENT(float, float1);
|
|
2197
|
-
DECLOP_MAKE_TWO_COMPONENT(float, float2);
|
|
2198
|
-
DECLOP_MAKE_THREE_COMPONENT(float, float3);
|
|
2199
|
-
DECLOP_MAKE_FOUR_COMPONENT(float, float4);
|
|
2200
|
-
|
|
2201
|
-
DECLOP_MAKE_ONE_COMPONENT(double, double1);
|
|
2202
|
-
DECLOP_MAKE_TWO_COMPONENT(double, double2);
|
|
2203
|
-
DECLOP_MAKE_THREE_COMPONENT(double, double3);
|
|
2204
|
-
DECLOP_MAKE_FOUR_COMPONENT(double, double4);
|
|
2205
|
-
|
|
2206
|
-
DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
|
|
2207
|
-
DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
|
|
2208
|
-
DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
|
|
2209
|
-
DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
|
|
2210
|
-
|
|
2211
|
-
DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
|
|
2212
|
-
DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
|
|
2213
|
-
DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
|
|
2214
|
-
DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
|
|
2215
|
-
|
|
2216
|
-
DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
|
|
2217
|
-
DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
|
|
2218
|
-
DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
|
|
2219
|
-
DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
|
|
2220
|
-
|
|
2221
|
-
DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
|
|
2222
|
-
DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
|
|
2223
|
-
DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
|
|
2224
|
-
DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
|
|
2225
|
-
|
|
2226
|
-
#endif
|