triton-windows 3.3.1.post19__cp312-cp312-win_amd64.whl → 3.4.0.post20__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (166) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +4 -1
  3. triton/_filecheck.py +87 -0
  4. triton/_internal_testing.py +26 -15
  5. triton/_utils.py +110 -21
  6. triton/backends/__init__.py +20 -23
  7. triton/backends/amd/__init__.py +0 -0
  8. triton/backends/amd/compiler.py +112 -78
  9. triton/backends/amd/driver.c +5 -2
  10. triton/backends/amd/driver.py +149 -47
  11. triton/backends/compiler.py +7 -21
  12. triton/backends/nvidia/bin/ptxas.exe +0 -0
  13. triton/backends/nvidia/compiler.py +92 -93
  14. triton/backends/nvidia/driver.c +90 -98
  15. triton/backends/nvidia/driver.py +303 -128
  16. triton/compiler/code_generator.py +212 -111
  17. triton/compiler/compiler.py +110 -25
  18. triton/experimental/__init__.py +0 -0
  19. triton/experimental/gluon/__init__.py +4 -0
  20. triton/experimental/gluon/_compiler.py +0 -0
  21. triton/experimental/gluon/_runtime.py +99 -0
  22. triton/experimental/gluon/language/__init__.py +18 -0
  23. triton/experimental/gluon/language/_core.py +312 -0
  24. triton/experimental/gluon/language/_layouts.py +230 -0
  25. triton/experimental/gluon/language/_math.py +12 -0
  26. triton/experimental/gluon/language/_semantic.py +287 -0
  27. triton/experimental/gluon/language/_standard.py +47 -0
  28. triton/experimental/gluon/language/nvidia/__init__.py +4 -0
  29. triton/experimental/gluon/language/nvidia/blackwell/__init__.py +202 -0
  30. triton/experimental/gluon/language/nvidia/blackwell/tma.py +32 -0
  31. triton/experimental/gluon/language/nvidia/hopper/__init__.py +11 -0
  32. triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +51 -0
  33. triton/experimental/gluon/language/nvidia/hopper/tma.py +96 -0
  34. triton/experimental/gluon/nvidia/__init__.py +4 -0
  35. triton/experimental/gluon/nvidia/blackwell.py +3 -0
  36. triton/experimental/gluon/nvidia/hopper.py +40 -0
  37. triton/knobs.py +481 -0
  38. triton/language/__init__.py +39 -14
  39. triton/language/core.py +794 -537
  40. triton/language/extra/cuda/__init__.py +10 -7
  41. triton/language/extra/cuda/gdc.py +42 -0
  42. triton/language/extra/cuda/libdevice.py +394 -394
  43. triton/language/extra/cuda/utils.py +21 -21
  44. triton/language/extra/hip/libdevice.py +113 -104
  45. triton/language/math.py +65 -66
  46. triton/language/random.py +12 -2
  47. triton/language/semantic.py +1706 -1770
  48. triton/language/standard.py +116 -51
  49. triton/runtime/autotuner.py +117 -59
  50. triton/runtime/build.py +76 -12
  51. triton/runtime/cache.py +18 -47
  52. triton/runtime/driver.py +32 -29
  53. triton/runtime/interpreter.py +72 -35
  54. triton/runtime/jit.py +146 -110
  55. triton/testing.py +16 -12
  56. triton/tools/disasm.py +3 -4
  57. triton/tools/tensor_descriptor.py +36 -0
  58. triton/windows_utils.py +14 -6
  59. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/METADATA +7 -2
  60. triton_windows-3.4.0.post20.dist-info/RECORD +186 -0
  61. triton_windows-3.4.0.post20.dist-info/entry_points.txt +3 -0
  62. triton_windows-3.4.0.post20.dist-info/licenses/LICENSE +23 -0
  63. triton_windows-3.4.0.post20.dist-info/top_level.txt +1 -0
  64. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  65. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
  66. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
  67. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
  68. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  69. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  70. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  71. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
  72. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  73. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
  74. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  75. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  76. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  77. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  78. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  79. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  80. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  81. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  82. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
  83. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
  84. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  85. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  86. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  87. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  88. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  89. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  90. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  91. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
  92. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  93. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  94. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  95. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  96. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  97. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
  98. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
  99. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
  100. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  101. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  102. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  103. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  104. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  105. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  106. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  107. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  108. triton/backends/amd/include/hip/device_functions.h +0 -38
  109. triton/backends/amd/include/hip/driver_types.h +0 -468
  110. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  111. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  112. triton/backends/amd/include/hip/hip_common.h +0 -100
  113. triton/backends/amd/include/hip/hip_complex.h +0 -38
  114. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  115. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  116. triton/backends/amd/include/hip/hip_ext.h +0 -161
  117. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  118. triton/backends/amd/include/hip/hip_fp8.h +0 -33
  119. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  120. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  121. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  122. triton/backends/amd/include/hip/hip_profile.h +0 -27
  123. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  124. triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
  125. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  126. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  127. triton/backends/amd/include/hip/hip_version.h +0 -17
  128. triton/backends/amd/include/hip/hiprtc.h +0 -421
  129. triton/backends/amd/include/hip/library_types.h +0 -78
  130. triton/backends/amd/include/hip/math_functions.h +0 -42
  131. triton/backends/amd/include/hip/surface_types.h +0 -63
  132. triton/backends/amd/include/hip/texture_types.h +0 -194
  133. triton/backends/amd/include/hsa/Brig.h +0 -1131
  134. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  135. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
  136. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  137. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  138. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  139. triton/backends/amd/include/hsa/hsa.h +0 -5738
  140. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  141. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
  142. triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
  143. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
  144. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  145. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  146. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  147. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  148. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
  149. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  150. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
  151. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
  152. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
  153. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  154. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  155. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  156. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  157. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  158. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  159. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  160. triton/backends/amd/include/roctracer/roctx.h +0 -229
  161. triton/language/_utils.py +0 -21
  162. triton/language/extra/cuda/_experimental_tma.py +0 -106
  163. triton/tools/experimental_descriptor.py +0 -32
  164. triton_windows-3.3.1.post19.dist-info/RECORD +0 -260
  165. triton_windows-3.3.1.post19.dist-info/top_level.txt +0 -14
  166. {triton_windows-3.3.1.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/WHEEL +0 -0
@@ -1,293 +0,0 @@
1
- /**
2
- * MIT License
3
- *
4
- * Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved.
5
- *
6
- * Permission is hereby granted, free of charge, to any person obtaining a copy
7
- * of this software and associated documentation files (the "Software"), to deal
8
- * in the Software without restriction, including without limitation the rights
9
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
- * copies of the Software, and to permit persons to whom the Software is
11
- * furnished to do so, subject to the following conditions:
12
- *
13
- * The above copyright notice and this permission notice shall be included in
14
- * all copies or substantial portions of the Software.
15
- *
16
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
- * SOFTWARE.
23
- */
24
-
25
- /*!\file
26
- * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef
27
- */
28
-
29
- #ifndef _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BFLOAT16_H_
30
- #define _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BFLOAT16_H_
31
-
32
- #include "host_defines.h"
33
- #if defined(__HIPCC_RTC__)
34
- #define __HOST_DEVICE__ __device__
35
- #else
36
- #define __HOST_DEVICE__ __host__ __device__
37
- #endif
38
-
39
- #if __cplusplus < 201103L || !defined(__HIPCC__)
40
-
41
- // If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only
42
- // include a minimal definition of hip_bfloat16
43
-
44
- #include <stdint.h>
45
- /*! \brief Struct to represent a 16 bit brain floating point number. */
46
- typedef struct
47
- {
48
- uint16_t data;
49
- } hip_bfloat16;
50
-
51
- #else // __cplusplus < 201103L || !defined(__HIPCC__)
52
-
53
- #include <hip/hip_runtime.h>
54
-
55
- #pragma clang diagnostic push
56
- #pragma clang diagnostic ignored "-Wshadow"
57
- struct hip_bfloat16
58
- {
59
- __hip_uint16_t data;
60
-
61
- enum truncate_t
62
- {
63
- truncate
64
- };
65
-
66
- __HOST_DEVICE__ hip_bfloat16() = default;
67
-
68
- // round upper 16 bits of IEEE float to convert to bfloat16
69
- explicit __HOST_DEVICE__ hip_bfloat16(float f)
70
- : data(float_to_bfloat16(f))
71
- {
72
- }
73
-
74
- explicit __HOST_DEVICE__ hip_bfloat16(float f, truncate_t)
75
- : data(truncate_float_to_bfloat16(f))
76
- {
77
- }
78
-
79
- // zero extend lower 16 bits of bfloat16 to convert to IEEE float
80
- __HOST_DEVICE__ operator float() const
81
- {
82
- union
83
- {
84
- uint32_t int32;
85
- float fp32;
86
- } u = {uint32_t(data) << 16};
87
- return u.fp32;
88
- }
89
-
90
- __HOST_DEVICE__ hip_bfloat16 &operator=(const float& f)
91
- {
92
- data = float_to_bfloat16(f);
93
- return *this;
94
- }
95
-
96
- static __HOST_DEVICE__ hip_bfloat16 round_to_bfloat16(float f)
97
- {
98
- hip_bfloat16 output;
99
- output.data = float_to_bfloat16(f);
100
- return output;
101
- }
102
-
103
- static __HOST_DEVICE__ hip_bfloat16 round_to_bfloat16(float f, truncate_t)
104
- {
105
- hip_bfloat16 output;
106
- output.data = truncate_float_to_bfloat16(f);
107
- return output;
108
- }
109
-
110
- private:
111
- static __HOST_DEVICE__ __hip_uint16_t float_to_bfloat16(float f)
112
- {
113
- union
114
- {
115
- float fp32;
116
- uint32_t int32;
117
- } u = {f};
118
- if(~u.int32 & 0x7f800000)
119
- {
120
- // When the exponent bits are not all 1s, then the value is zero, normal,
121
- // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
122
- // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
123
- // This causes the bfloat16's mantissa to be incremented by 1 if the 16
124
- // least significant bits of the float mantissa are greater than 0x8000,
125
- // or if they are equal to 0x8000 and the least significant bit of the
126
- // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
127
- // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
128
- // has the value 0x7f, then incrementing it causes it to become 0x00 and
129
- // the exponent is incremented by one, which is the next higher FP value
130
- // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
131
- // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
132
- // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
133
- // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
134
- // incrementing it causes it to become an exponent of 0xFF and a mantissa
135
- // of 0x00, which is Inf, the next higher value to the unrounded value.
136
- u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even
137
- }
138
- else if(u.int32 & 0xffff)
139
- {
140
- // When all of the exponent bits are 1, the value is Inf or NaN.
141
- // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
142
- // mantissa bit. Quiet NaN is indicated by the most significant mantissa
143
- // bit being 1. Signaling NaN is indicated by the most significant
144
- // mantissa bit being 0 but some other bit(s) being 1. If any of the
145
- // lower 16 bits of the mantissa are 1, we set the least significant bit
146
- // of the bfloat16 mantissa, in order to preserve signaling NaN in case
147
- // the bloat16's mantissa bits are all 0.
148
- u.int32 |= 0x10000; // Preserve signaling NaN
149
- }
150
- return __hip_uint16_t(u.int32 >> 16);
151
- }
152
-
153
- // Truncate instead of rounding, preserving SNaN
154
- static __HOST_DEVICE__ __hip_uint16_t truncate_float_to_bfloat16(float f)
155
- {
156
- union
157
- {
158
- float fp32;
159
- uint32_t int32;
160
- } u = {f};
161
- return __hip_uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff));
162
- }
163
- };
164
- #pragma clang diagnostic pop
165
-
166
- typedef struct
167
- {
168
- __hip_uint16_t data;
169
- } hip_bfloat16_public;
170
-
171
- static_assert(__hip_internal::is_standard_layout<hip_bfloat16>{},
172
- "hip_bfloat16 is not a standard layout type, and thus is "
173
- "incompatible with C.");
174
-
175
- static_assert(__hip_internal::is_trivial<hip_bfloat16>{},
176
- "hip_bfloat16 is not a trivial type, and thus is "
177
- "incompatible with C.");
178
- #if !defined(__HIPCC_RTC__)
179
- static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public)
180
- && offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data),
181
- "internal hip_bfloat16 does not match public hip_bfloat16");
182
-
183
- inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16)
184
- {
185
- return os << float(bf16);
186
- }
187
- #endif
188
-
189
- inline __HOST_DEVICE__ hip_bfloat16 operator+(hip_bfloat16 a)
190
- {
191
- return a;
192
- }
193
- inline __HOST_DEVICE__ hip_bfloat16 operator-(hip_bfloat16 a)
194
- {
195
- a.data ^= 0x8000;
196
- return a;
197
- }
198
- inline __HOST_DEVICE__ hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b)
199
- {
200
- return hip_bfloat16(float(a) + float(b));
201
- }
202
- inline __HOST_DEVICE__ hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b)
203
- {
204
- return hip_bfloat16(float(a) - float(b));
205
- }
206
- inline __HOST_DEVICE__ hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b)
207
- {
208
- return hip_bfloat16(float(a) * float(b));
209
- }
210
- inline __HOST_DEVICE__ hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b)
211
- {
212
- return hip_bfloat16(float(a) / float(b));
213
- }
214
- inline __HOST_DEVICE__ bool operator<(hip_bfloat16 a, hip_bfloat16 b)
215
- {
216
- return float(a) < float(b);
217
- }
218
- inline __HOST_DEVICE__ bool operator==(hip_bfloat16 a, hip_bfloat16 b)
219
- {
220
- return float(a) == float(b);
221
- }
222
- inline __HOST_DEVICE__ bool operator>(hip_bfloat16 a, hip_bfloat16 b)
223
- {
224
- return b < a;
225
- }
226
- inline __HOST_DEVICE__ bool operator<=(hip_bfloat16 a, hip_bfloat16 b)
227
- {
228
- return !(a > b);
229
- }
230
- inline __HOST_DEVICE__ bool operator!=(hip_bfloat16 a, hip_bfloat16 b)
231
- {
232
- return !(a == b);
233
- }
234
- inline __HOST_DEVICE__ bool operator>=(hip_bfloat16 a, hip_bfloat16 b)
235
- {
236
- return !(a < b);
237
- }
238
- inline __HOST_DEVICE__ hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b)
239
- {
240
- return a = a + b;
241
- }
242
- inline __HOST_DEVICE__ hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b)
243
- {
244
- return a = a - b;
245
- }
246
- inline __HOST_DEVICE__ hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b)
247
- {
248
- return a = a * b;
249
- }
250
- inline __HOST_DEVICE__ hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b)
251
- {
252
- return a = a / b;
253
- }
254
- inline __HOST_DEVICE__ hip_bfloat16& operator++(hip_bfloat16& a)
255
- {
256
- return a += hip_bfloat16(1.0f);
257
- }
258
- inline __HOST_DEVICE__ hip_bfloat16& operator--(hip_bfloat16& a)
259
- {
260
- return a -= hip_bfloat16(1.0f);
261
- }
262
- inline __HOST_DEVICE__ hip_bfloat16 operator++(hip_bfloat16& a, int)
263
- {
264
- hip_bfloat16 orig = a;
265
- ++a;
266
- return orig;
267
- }
268
- inline __HOST_DEVICE__ hip_bfloat16 operator--(hip_bfloat16& a, int)
269
- {
270
- hip_bfloat16 orig = a;
271
- --a;
272
- return orig;
273
- }
274
-
275
- namespace std
276
- {
277
- constexpr __HOST_DEVICE__ bool isinf(hip_bfloat16 a)
278
- {
279
- return !(~a.data & 0x7f80) && !(a.data & 0x7f);
280
- }
281
- constexpr __HOST_DEVICE__ bool isnan(hip_bfloat16 a)
282
- {
283
- return !(~a.data & 0x7f80) && +(a.data & 0x7f);
284
- }
285
- constexpr __HOST_DEVICE__ bool iszero(hip_bfloat16 a)
286
- {
287
- return !(a.data & 0x7fff);
288
- }
289
- }
290
-
291
- #endif // __cplusplus < 201103L || !defined(__HIPCC__)
292
-
293
- #endif // _HIP_BFLOAT16_H_
@@ -1,32 +0,0 @@
1
- /*
2
- Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved.
3
-
4
- Permission is hereby granted, free of charge, to any person obtaining a copy of
5
- this software and associated documentation files (the "Software"), to deal in
6
- the Software without restriction, including without limitation the rights to
7
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8
- of the Software, and to permit persons to whom the Software is furnished to do
9
- so, subject to the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be included in all
12
- copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
- SOFTWARE.
21
- */
22
-
23
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H
24
- #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H
25
-
26
- #if defined(__clang__) && defined(__HIP__)
27
- #define __HIP_CLANG_ONLY__ 1
28
- #else
29
- #define __HIP_CLANG_ONLY__ 0
30
- #endif
31
-
32
- #endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H
@@ -1,174 +0,0 @@
1
- /*
2
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
3
-
4
- Permission is hereby granted, free of charge, to any person obtaining a copy
5
- of this software and associated documentation files (the "Software"), to deal
6
- in the Software without restriction, including without limitation the rights
7
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
- copies of the Software, and to permit persons to whom the Software is
9
- furnished to do so, subject to the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be included in
12
- all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
- THE SOFTWARE.
21
- */
22
-
23
- /* The header defines complex numbers and related functions*/
24
-
25
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H
26
- #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H
27
-
28
- #if !defined(__HIPCC_RTC__)
29
- #include "hip/amd_detail/amd_hip_vector_types.h"
30
- #endif
31
-
32
- #if defined(__HIPCC_RTC__)
33
- #define __HOST_DEVICE__ __device__
34
- #else
35
- #define __HOST_DEVICE__ __host__ __device__
36
- // TODO: Clang has a bug which allows device functions to call std functions
37
- // when std functions are introduced into default namespace by using statement.
38
- // math.h may be included after this bug is fixed.
39
- #if __cplusplus
40
- #include <cmath>
41
- #else
42
- #include "math.h"
43
- #endif
44
- #endif // !defined(__HIPCC_RTC__)
45
-
46
- typedef float2 hipFloatComplex;
47
-
48
- __HOST_DEVICE__ static inline float hipCrealf(hipFloatComplex z) { return z.x; }
49
-
50
- __HOST_DEVICE__ static inline float hipCimagf(hipFloatComplex z) { return z.y; }
51
-
52
- __HOST_DEVICE__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
53
- hipFloatComplex z;
54
- z.x = a;
55
- z.y = b;
56
- return z;
57
- }
58
-
59
- __HOST_DEVICE__ static inline hipFloatComplex hipConjf(hipFloatComplex z) {
60
- hipFloatComplex ret;
61
- ret.x = z.x;
62
- ret.y = -z.y;
63
- return ret;
64
- }
65
-
66
- __HOST_DEVICE__ static inline float hipCsqabsf(hipFloatComplex z) {
67
- return z.x * z.x + z.y * z.y;
68
- }
69
-
70
- __HOST_DEVICE__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
71
- return make_hipFloatComplex(p.x + q.x, p.y + q.y);
72
- }
73
-
74
- __HOST_DEVICE__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
75
- return make_hipFloatComplex(p.x - q.x, p.y - q.y);
76
- }
77
-
78
- __HOST_DEVICE__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
79
- return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
80
- }
81
-
82
- __HOST_DEVICE__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
83
- float sqabs = hipCsqabsf(q);
84
- hipFloatComplex ret;
85
- ret.x = (p.x * q.x + p.y * q.y) / sqabs;
86
- ret.y = (p.y * q.x - p.x * q.y) / sqabs;
87
- return ret;
88
- }
89
-
90
- __HOST_DEVICE__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); }
91
-
92
-
93
- typedef double2 hipDoubleComplex;
94
-
95
- __HOST_DEVICE__ static inline double hipCreal(hipDoubleComplex z) { return z.x; }
96
-
97
- __HOST_DEVICE__ static inline double hipCimag(hipDoubleComplex z) { return z.y; }
98
-
99
- __HOST_DEVICE__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
100
- hipDoubleComplex z;
101
- z.x = a;
102
- z.y = b;
103
- return z;
104
- }
105
-
106
- __HOST_DEVICE__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) {
107
- hipDoubleComplex ret;
108
- ret.x = z.x;
109
- ret.y = -z.y;
110
- return ret;
111
- }
112
-
113
- __HOST_DEVICE__ static inline double hipCsqabs(hipDoubleComplex z) {
114
- return z.x * z.x + z.y * z.y;
115
- }
116
-
117
- __HOST_DEVICE__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
118
- return make_hipDoubleComplex(p.x + q.x, p.y + q.y);
119
- }
120
-
121
- __HOST_DEVICE__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
122
- return make_hipDoubleComplex(p.x - q.x, p.y - q.y);
123
- }
124
-
125
- __HOST_DEVICE__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
126
- return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
127
- }
128
-
129
- __HOST_DEVICE__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
130
- double sqabs = hipCsqabs(q);
131
- hipDoubleComplex ret;
132
- ret.x = (p.x * q.x + p.y * q.y) / sqabs;
133
- ret.y = (p.y * q.x - p.x * q.y) / sqabs;
134
- return ret;
135
- }
136
-
137
- __HOST_DEVICE__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); }
138
-
139
- typedef hipFloatComplex hipComplex;
140
-
141
- __HOST_DEVICE__ static inline hipComplex make_hipComplex(float x, float y) {
142
- return make_hipFloatComplex(x, y);
143
- }
144
-
145
- __HOST_DEVICE__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
146
- return make_hipFloatComplex((float)z.x, (float)z.y);
147
- }
148
-
149
- __HOST_DEVICE__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
150
- return make_hipDoubleComplex((double)z.x, (double)z.y);
151
- }
152
-
153
- __HOST_DEVICE__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
154
- float real = (p.x * q.x) + r.x;
155
- float imag = (q.x * p.y) + r.y;
156
-
157
- real = -(p.y * q.y) + real;
158
- imag = (p.x * q.y) + imag;
159
-
160
- return make_hipComplex(real, imag);
161
- }
162
-
163
- __HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
164
- hipDoubleComplex r) {
165
- double real = (p.x * q.x) + r.x;
166
- double imag = (q.x * p.y) + r.y;
167
-
168
- real = -(p.y * q.y) + real;
169
- imag = (p.x * q.y) + imag;
170
-
171
- return make_hipDoubleComplex(real, imag);
172
- }
173
-
174
- #endif //HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H