triton-windows 3.4.0.post20__cp313-cp313-win_amd64.whl → 3.5.0.post21__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (107) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +8 -2
  3. triton/_filecheck.py +24 -14
  4. triton/_internal_testing.py +70 -4
  5. triton/_utils.py +3 -1
  6. triton/backends/amd/compiler.py +68 -60
  7. triton/backends/amd/driver.c +113 -44
  8. triton/backends/amd/driver.py +133 -57
  9. triton/backends/driver.py +13 -0
  10. triton/backends/nvidia/compiler.py +80 -22
  11. triton/backends/nvidia/driver.c +88 -15
  12. triton/backends/nvidia/driver.py +130 -123
  13. triton/compiler/__init__.py +5 -2
  14. triton/compiler/code_generator.py +270 -163
  15. triton/compiler/compiler.py +45 -62
  16. triton/experimental/gluon/__init__.py +3 -2
  17. triton/experimental/gluon/_runtime.py +9 -6
  18. triton/experimental/gluon/language/__init__.py +117 -16
  19. triton/experimental/gluon/language/_core.py +246 -68
  20. triton/experimental/gluon/language/_layouts.py +398 -45
  21. triton/experimental/gluon/language/_math.py +17 -9
  22. triton/experimental/gluon/language/_semantic.py +130 -37
  23. triton/experimental/gluon/language/_standard.py +55 -22
  24. triton/experimental/gluon/language/amd/__init__.py +4 -0
  25. triton/experimental/gluon/language/amd/_layouts.py +96 -0
  26. triton/experimental/gluon/language/amd/cdna3/__init__.py +100 -0
  27. triton/experimental/gluon/language/amd/cdna4/__init__.py +48 -0
  28. triton/experimental/gluon/language/amd/cdna4/async_copy.py +151 -0
  29. triton/experimental/gluon/language/extra/__init__.py +3 -0
  30. triton/experimental/gluon/language/nvidia/ampere/__init__.py +3 -0
  31. triton/experimental/gluon/language/nvidia/ampere/async_copy.py +74 -0
  32. triton/experimental/gluon/language/nvidia/ampere/mbarrier.py +80 -0
  33. triton/experimental/gluon/language/nvidia/blackwell/__init__.py +192 -7
  34. triton/experimental/gluon/language/nvidia/blackwell/tma.py +20 -0
  35. triton/experimental/gluon/language/nvidia/hopper/__init__.py +124 -3
  36. triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +20 -37
  37. triton/experimental/gluon/language/nvidia/hopper/tma.py +4 -3
  38. triton/experimental/gluon/nvidia/hopper.py +6 -1
  39. triton/knobs.py +132 -67
  40. triton/language/__init__.py +16 -10
  41. triton/language/core.py +163 -83
  42. triton/language/extra/cuda/gdc.py +6 -6
  43. triton/language/extra/hip/__init__.py +3 -1
  44. triton/language/extra/hip/libdevice.py +7 -0
  45. triton/language/extra/hip/utils.py +35 -0
  46. triton/language/extra/libdevice.py +4 -0
  47. triton/language/semantic.py +76 -23
  48. triton/language/standard.py +14 -14
  49. triton/language/target_info.py +54 -0
  50. triton/runtime/_allocation.py +15 -3
  51. triton/runtime/_async_compile.py +55 -0
  52. triton/runtime/autotuner.py +4 -5
  53. triton/runtime/build.py +11 -9
  54. triton/runtime/cache.py +44 -1
  55. triton/runtime/driver.py +16 -41
  56. triton/runtime/interpreter.py +31 -23
  57. triton/runtime/jit.py +318 -157
  58. triton/runtime/tcc/include/_mingw.h +8 -10
  59. triton/runtime/tcc/include/assert.h +5 -0
  60. triton/runtime/tcc/include/errno.h +1 -1
  61. triton/runtime/tcc/include/float.h +21 -3
  62. triton/runtime/tcc/include/iso646.h +36 -0
  63. triton/runtime/tcc/include/limits.h +5 -0
  64. triton/runtime/tcc/include/malloc.h +2 -2
  65. triton/runtime/tcc/include/math.h +21 -261
  66. triton/runtime/tcc/include/stdalign.h +16 -0
  67. triton/runtime/tcc/include/stdarg.h +5 -70
  68. triton/runtime/tcc/include/stdatomic.h +171 -0
  69. triton/runtime/tcc/include/stddef.h +7 -19
  70. triton/runtime/tcc/include/stdlib.h +15 -4
  71. triton/runtime/tcc/include/stdnoreturn.h +7 -0
  72. triton/runtime/tcc/include/sys/stat.h +2 -2
  73. triton/runtime/tcc/include/sys/types.h +5 -0
  74. triton/runtime/tcc/include/tcc/tcc_libm.h +444 -27
  75. triton/runtime/tcc/include/tccdefs.h +342 -0
  76. triton/runtime/tcc/include/tgmath.h +89 -0
  77. triton/runtime/tcc/include/uchar.h +33 -0
  78. triton/runtime/tcc/include/unistd.h +1 -0
  79. triton/runtime/tcc/include/winapi/qos.h +72 -0
  80. triton/runtime/tcc/include/winapi/shellapi.h +59 -0
  81. triton/runtime/tcc/include/winapi/winbase.h +9 -2
  82. triton/runtime/tcc/include/winapi/wincon.h +8 -0
  83. triton/runtime/tcc/include/winapi/windows.h +1 -1
  84. triton/runtime/tcc/include/winapi/winnls.h +778 -0
  85. triton/runtime/tcc/include/winapi/winnt.h +9 -7
  86. triton/runtime/tcc/include/winapi/winsock2.h +1474 -0
  87. triton/runtime/tcc/include/winapi/ws2ipdef.h +21 -0
  88. triton/runtime/tcc/include/winapi/ws2tcpip.h +391 -0
  89. triton/runtime/tcc/lib/libtcc1.a +0 -0
  90. triton/runtime/tcc/lib/python314.def +1800 -0
  91. triton/runtime/tcc/lib/python314t.def +1809 -0
  92. triton/runtime/tcc/libtcc.dll +0 -0
  93. triton/runtime/tcc/tcc.exe +0 -0
  94. triton/tools/compile.py +62 -14
  95. triton/tools/extra/cuda/compile.c +1 -0
  96. triton/tools/extra/hip/compile.cpp +66 -0
  97. triton/tools/extra/hip/compile.h +13 -0
  98. triton/tools/ragged_tma.py +92 -0
  99. triton/tools/tensor_descriptor.py +7 -9
  100. triton/windows_utils.py +42 -79
  101. {triton_windows-3.4.0.post20.dist-info → triton_windows-3.5.0.post21.dist-info}/METADATA +3 -4
  102. {triton_windows-3.4.0.post20.dist-info → triton_windows-3.5.0.post21.dist-info}/RECORD +106 -75
  103. triton/runtime/tcc/lib/libtcc1-64.a +0 -0
  104. {triton_windows-3.4.0.post20.dist-info → triton_windows-3.5.0.post21.dist-info}/WHEEL +0 -0
  105. {triton_windows-3.4.0.post20.dist-info → triton_windows-3.5.0.post21.dist-info}/entry_points.txt +0 -0
  106. {triton_windows-3.4.0.post20.dist-info → triton_windows-3.5.0.post21.dist-info}/licenses/LICENSE +0 -0
  107. {triton_windows-3.4.0.post20.dist-info → triton_windows-3.5.0.post21.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
2
2
  #define _TCC_LIBM_H_
3
3
 
4
4
  #include "../math.h"
5
+ #include "../stdint.h"
5
6
 
6
7
  /* TCC uses 8 bytes for double and long double, so effectively the l variants
7
8
  * are never used. For now, they just run the normal (double) variant.
@@ -46,7 +47,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
46
47
  /* fpclassify */
47
48
 
48
49
  __CRT_INLINE int __cdecl __fpclassify (double x) {
49
- union {double f; uint64_t i;} u = {x};
50
+ union {double f; uint64_t i;} u = {.f = x};
50
51
  int e = u.i>>52 & 0x7ff;
51
52
  if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
52
53
  if (e==0x7ff) return u.i<<12 ? FP_NAN : FP_INFINITE;
@@ -54,7 +55,7 @@ __CRT_INLINE int __cdecl __fpclassify (double x) {
54
55
  }
55
56
 
56
57
  __CRT_INLINE int __cdecl __fpclassifyf (float x) {
57
- union {float f; uint32_t i;} u = {x};
58
+ union {float f; uint32_t i;} u = {.f = x};
58
59
  int e = u.i>>23 & 0xff;
59
60
  if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
60
61
  if (e==0xff) return u.i<<9 ? FP_NAN : FP_INFINITE;
@@ -69,13 +70,13 @@ __CRT_INLINE int __cdecl __fpclassifyl (long double x) {
69
70
  /* signbit */
70
71
 
71
72
  __CRT_INLINE int __cdecl __signbit (double x) {
72
- union {double d; uint64_t i;} y = { x };
73
- return y.i>>63;
73
+ union {double f; uint64_t i;} u = {.f = x};
74
+ return u.i>>63;
74
75
  }
75
76
 
76
77
  __CRT_INLINE int __cdecl __signbitf (float x) {
77
- union {float f; uint32_t i; } y = { x };
78
- return y.i>>31;
78
+ union {float f; uint32_t i; } u = {.f = x};
79
+ return u.i>>31;
79
80
  }
80
81
 
81
82
  __CRT_INLINE int __cdecl __signbitl (long double x) {
@@ -122,21 +123,13 @@ __CRT_INLINE long double __cdecl fmaxl (long double x, long double y) {
122
123
 
123
124
  /* *round* */
124
125
 
125
- #define TCCFP_FORCE_EVAL(x) do { \
126
- if (sizeof(x) == sizeof(float)) { \
127
- volatile float __x; \
128
- __x = (x); \
129
- } else if (sizeof(x) == sizeof(double)) { \
130
- volatile double __x; \
131
- __x = (x); \
132
- } else { \
133
- volatile long double __x; \
134
- __x = (x); \
135
- } \
126
+ #define TCCFP_FORCE_EVAL(x) do { \
127
+ volatile typeof(x) __x; \
128
+ __x = (x); \
136
129
  } while(0)
137
130
 
138
131
  __CRT_INLINE double __cdecl round (double x) {
139
- union {double f; uint64_t i;} u = {x};
132
+ union {double f; uint64_t i;} u = {.f = x};
140
133
  int e = u.i >> 52 & 0x7ff;
141
134
  double y;
142
135
 
@@ -150,15 +143,8 @@ __CRT_INLINE double __cdecl round (double x) {
150
143
  return 0*u.f;
151
144
  }
152
145
  y = (double)(x + 0x1p52) - 0x1p52 - x;
153
- if (y > 0.5)
154
- y = y + x - 1;
155
- else if (y <= -0.5)
156
- y = y + x + 1;
157
- else
158
- y = y + x;
159
- if (u.i >> 63)
160
- y = -y;
161
- return y;
146
+ y = y + x - (y > 0.5) + (y <= -0.5); /* branchless */
147
+ return (u.i >> 63) ? -y : y;
162
148
  }
163
149
 
164
150
  __CRT_INLINE long __cdecl lround (double x) {
@@ -194,8 +180,439 @@ __CRT_INLINE long long __cdecl llroundl (long double x) {
194
180
  }
195
181
 
196
182
 
183
+ /* MUSL asinh, acosh, atanh */
184
+
185
+ __CRT_INLINE double __cdecl asinh(double x) {
186
+ union {double f; uint64_t i;} u = {.f = x};
187
+ unsigned e = u.i >> 52 & 0x7ff, s = u.i >> 63;
188
+ u.i &= -1ull / 2, x = u.f;
189
+ if (e >= 0x3ff + 26) x = log(x) + 0.693147180559945309;
190
+ else if (e >= 0x3ff + 1) x = log(2*x + 1 / (sqrt(x*x + 1) + x)); /* |x|>=2 */
191
+ else if (e >= 0x3ff - 26) x = log1p(x + x*x / (sqrt(x*x + 1) + 1));
192
+ else TCCFP_FORCE_EVAL(x + 0x1p120f);
193
+ return s ? -x : x;
194
+ }
195
+
196
+ __CRT_INLINE double __cdecl acosh(double x) {
197
+ union {double f; uint64_t i;} u = {.f = x};
198
+ unsigned e = u.i >> 52 & 0x7ff;
199
+ if (e < 0x3ff + 1) return --x, log1p(x + sqrt(x*x + 2*x)); /* |x|<2 */
200
+ if (e < 0x3ff + 26) return log(2*x - 1 / (x + sqrt(x*x - 1)));
201
+ return log(x) + 0.693147180559945309;
202
+ }
203
+
204
+ __CRT_INLINE double __cdecl atanh(double x) {
205
+ union {double f; uint64_t i;} u = {.f = x};
206
+ unsigned e = u.i >> 52 & 0x7ff, s = u.i >> 63;
207
+ u.i &= -1ull / 2, x = u.f;
208
+ if (e < 0x3ff - 1) {
209
+ if (e < 0x3ff - 32) { if (e == 0) TCCFP_FORCE_EVAL((float)x); }
210
+ else x = 0.5 * log1p(2*x + 2*x*x / (1 - x)); /* |x| < 0.5 */
211
+ } else x = 0.5 * log1p(2*(x / (1 - x))); /* avoid overflow */
212
+ return s ? -x : x;
213
+ }
214
+
215
+ /* MUSL scalbn */
216
+
217
+ __CRT_INLINE double __cdecl scalbn(double x, int n) {
218
+ union {double f; uint64_t i;} u;
219
+ if (n > 1023) {
220
+ x *= 0x1p1023, n -= 1023;
221
+ if (n > 1023) {
222
+ x *= 0x1p1023, n -= 1023;
223
+ if (n > 1023) n = 1023;
224
+ }
225
+ } else if (n < -1022) {
226
+ x *= 0x1p-1022 * 0x1p53, n += 1022 - 53;
227
+ if (n < -1022) {
228
+ x *= 0x1p-1022 * 0x1p53, n += 1022 - 53;
229
+ if (n < -1022) n = -1022;
230
+ }
231
+ }
232
+ u.i = (0x3ffull + n) << 52;
233
+ return x * u.f;
234
+ }
235
+
236
+ /* MUSL: Override msvcrt frexp(): 4.5x speedup! */
237
+
238
+ __CRT_INLINE double __cdecl frexp(double x, int *e) {
239
+ union {double f; uint64_t i;} u = {.f = x};
240
+ int ee = u.i>>52 & 0x7ff;
241
+ if (!ee) {
242
+ if (x) x = frexp(x*0x1p64, e), *e -= 64;
243
+ else *e = 0;
244
+ return x;
245
+ } else if (ee == 0x7ff)
246
+ return x;
247
+ *e = ee - 0x3fe;
248
+ u.i &= 0x800fffffffffffffull;
249
+ u.i |= 0x3fe0000000000000ull;
250
+ return u.f;
251
+ }
252
+
253
+ /* MUSL nan */
254
+
255
+ __CRT_INLINE double __cdecl nan(const char* s) {
256
+ return NAN;
257
+ }
258
+ __CRT_INLINE float __cdecl nanf(const char* s) {
259
+ return NAN;
260
+ }
261
+ __CRT_INLINE long double __cdecl nanl(const char* s) {
262
+ return NAN;
263
+ }
264
+
265
+
197
266
  /*******************************************************************************
198
267
  End of code based on MUSL
199
268
  *******************************************************************************/
200
269
 
270
+
271
+ /* Following are math functions missing from msvcrt.dll, and not defined
272
+ * in math.h or above. Functions still remaining:
273
+ * remquo(), remainder(), fma(), erf(), erfc(), nearbyint().
274
+ * In <stdlib.h>: lldiv().
275
+ */
276
+
277
+ __CRT_INLINE float __cdecl scalbnf(float x, int n) {
278
+ return scalbn(x, n);
279
+ }
280
+ __CRT_INLINE long double __cdecl scalbnl(long double x, int n) {
281
+ return scalbn(x, n);
282
+ }
283
+
284
+ __CRT_INLINE double __cdecl scalbln(double x, long n) {
285
+ return scalbn(x, n);
286
+ }
287
+ __CRT_INLINE float __cdecl scalblnf(float x, long n) {
288
+ return scalbn(x, n);
289
+ }
290
+ __CRT_INLINE long double __cdecl scalblnl(long double x, long n) {
291
+ return scalbn(x, n);
292
+ }
293
+
294
+ /* Override msvcrt ldexp(): 7.3x speedup! */
295
+
296
+ __CRT_INLINE double __cdecl ldexp(double x, int expn) {
297
+ return scalbn(x, expn);
298
+ }
299
+ __CRT_INLINE float __cdecl ldexpf(float x, int expn) {
300
+ return scalbn(x, expn);
301
+ }
302
+ __CRT_INLINE long double __cdecl ldexpl(long double x, int expn) {
303
+ return scalbn(x, expn);
304
+ }
305
+
306
+ __CRT_INLINE float __cdecl frexpf(float x, int *y) {
307
+ return frexp(x, y);
308
+ }
309
+ __CRT_INLINE long double __cdecl frexpl (long double x, int* y) {
310
+ return frexp(x, y);
311
+ }
312
+
313
+
314
+ __CRT_INLINE double __cdecl rint(double x) {
315
+ double retval;
316
+ __asm__ (
317
+ "fldl %1\n"
318
+ "frndint \n"
319
+ "fstpl %0\n" : "=m" (retval) : "m" (x));
320
+ return retval;
321
+ }
322
+
323
+ __CRT_INLINE float __cdecl rintf(float x) {
324
+ float retval;
325
+ __asm__ (
326
+ "flds %1\n"
327
+ "frndint \n"
328
+ "fstps %0\n" : "=m" (retval) : "m" (x));
329
+ return retval;
330
+ }
331
+ __CRT_INLINE long double __cdecl rintl (long double x) {
332
+ return rint(x);
333
+ }
334
+
335
+
336
+ /* 7.12.9.5 */
337
+ __CRT_INLINE long __cdecl lrint(double x) {
338
+ long retval;
339
+ __asm__ __volatile__
340
+ ("fldl %1\n"
341
+ "fistpl %0" : "=m" (retval) : "m" (x));
342
+ return retval;
343
+ }
344
+
345
+ __CRT_INLINE long __cdecl lrintf(float x) {
346
+ long retval;
347
+ __asm__ __volatile__
348
+ ("flds %1\n"
349
+ "fistpl %0" : "=m" (retval) : "m" (x));
350
+ return retval;
351
+ }
352
+
353
+ __CRT_INLINE long __cdecl lrintl (long double x) {
354
+ return lrint(x);
355
+ }
356
+
357
+
358
+ __CRT_INLINE long long __cdecl llrint(double x) {
359
+ long long retval;
360
+ __asm__ __volatile__
361
+ ("fldl %1\n"
362
+ "fistpll %0" : "=m" (retval) : "m" (x));
363
+ return retval;
364
+ }
365
+
366
+ __CRT_INLINE long long __cdecl llrintf(float x) {
367
+ long long retval;
368
+ __asm__ __volatile__
369
+ ("flds %1\n"
370
+ "fistpll %0" : "=m" (retval) : "m" (x));
371
+ return retval;
372
+ }
373
+
374
+ __CRT_INLINE long long __cdecl llrintl (long double x) {
375
+ return llrint(x);
376
+ }
377
+
378
+
379
+ __CRT_INLINE double __cdecl trunc(double _x) {
380
+ double retval;
381
+ unsigned short saved_cw;
382
+ unsigned short tmp_cw;
383
+ __asm__ ("fnstcw %0;" : "=m" (saved_cw)); /* save FPU control word */
384
+ tmp_cw = (saved_cw & ~(FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO))
385
+ | FE_TOWARDZERO;
386
+ __asm__ ("fldcw %0;" : : "m" (tmp_cw));
387
+ __asm__ ("fldl %1;"
388
+ "frndint;"
389
+ "fstpl %0;" : "=m" (retval) : "m" (_x)); /* round towards zero */
390
+ __asm__ ("fldcw %0;" : : "m" (saved_cw) ); /* restore saved control word */
391
+ return retval;
392
+ }
393
+
394
+ __CRT_INLINE float __cdecl truncf(float x) {
395
+ return (float) ((intptr_t) x);
396
+ }
397
+ __CRT_INLINE long double __cdecl truncl(long double x) {
398
+ return trunc(x);
399
+ }
400
+
401
+
402
+ __CRT_INLINE long double __cdecl nextafterl(long double x, long double to) {
403
+ return nextafter(x, to);
404
+ }
405
+
406
+ __CRT_INLINE double __cdecl nexttoward(double x, long double to) {
407
+ return nextafter(x, to);
408
+ }
409
+ __CRT_INLINE float __cdecl nexttowardf(float x, long double to) {
410
+ return nextafterf(x, to);
411
+ }
412
+ __CRT_INLINE long double __cdecl nexttowardl(long double x, long double to) {
413
+ return nextafter(x, to);
414
+ }
415
+
416
+ /* Override msvcrt fabs(): 6.3x speedup! */
417
+
418
+ __CRT_INLINE double __cdecl fabs(double x) {
419
+ return x < 0 ? -x : x;
420
+ }
421
+ __CRT_INLINE float __cdecl fabsf(float x) {
422
+ return x < 0 ? -x : x;
423
+ }
424
+ __CRT_INLINE long double __cdecl fabsl(long double x) {
425
+ return x < 0 ? -x : x;
426
+ }
427
+
428
+
429
+ #if defined(_WIN32) && !defined(_WIN64) && !defined(__ia64__)
430
+ __CRT_INLINE float acosf(float x) { return acos(x); }
431
+ __CRT_INLINE float asinf(float x) { return asin(x); }
432
+ __CRT_INLINE float atanf(float x) { return atan(x); }
433
+ __CRT_INLINE float atan2f(float x, float y) { return atan2(x, y); }
434
+ __CRT_INLINE float ceilf(float x) { return ceil(x); }
435
+ __CRT_INLINE float cosf(float x) { return cos(x); }
436
+ __CRT_INLINE float coshf(float x) { return cosh(x); }
437
+ __CRT_INLINE float expf(float x) { return exp(x); }
438
+ __CRT_INLINE float floorf(float x) { return floor(x); }
439
+ __CRT_INLINE float fmodf(float x, float y) { return fmod(x, y); }
440
+ __CRT_INLINE float logf(float x) { return log(x); }
441
+ __CRT_INLINE float logbf(float x) { return logb(x); }
442
+ __CRT_INLINE float log10f(float x) { return log10(x); }
443
+ __CRT_INLINE float modff(float x, float *y) { double di, df = modf(x, &di); *y = di; return df; }
444
+ __CRT_INLINE float powf(float x, float y) { return pow(x, y); }
445
+ __CRT_INLINE float sinf(float x) { return sin(x); }
446
+ __CRT_INLINE float sinhf(float x) { return sinh(x); }
447
+ __CRT_INLINE float sqrtf(float x) { return sqrt(x); }
448
+ __CRT_INLINE float tanf(float x) { return tan(x); }
449
+ __CRT_INLINE float tanhf(float x) { return tanh(x); }
450
+ #endif
451
+ __CRT_INLINE float __cdecl asinhf(float x) { return asinh(x); }
452
+ __CRT_INLINE float __cdecl acoshf(float x) { return acosh(x); }
453
+ __CRT_INLINE float __cdecl atanhf(float x) { return atanh(x); }
454
+
455
+ __CRT_INLINE long double __cdecl asinhl(long double x) { return asinh(x); }
456
+ __CRT_INLINE long double __cdecl acoshl(long double x) { return acosh(x); }
457
+ __CRT_INLINE long double __cdecl atanhl(long double x) { return atanh(x); }
458
+ __CRT_INLINE long double __cdecl asinl(long double x) { return asin(x); }
459
+ __CRT_INLINE long double __cdecl acosl(long double x) { return acos(x); }
460
+ __CRT_INLINE long double __cdecl atanl(long double x) { return atan(x); }
461
+ __CRT_INLINE long double __cdecl ceill(long double x) { return ceil(x); }
462
+ __CRT_INLINE long double __cdecl coshl(long double x) { return cosh(x); }
463
+ __CRT_INLINE long double __cdecl cosl(long double x) { return cos(x); }
464
+ __CRT_INLINE long double __cdecl expl(long double x) { return exp(x); }
465
+ __CRT_INLINE long double __cdecl floorl(long double x) { return floor(x); }
466
+ __CRT_INLINE long double __cdecl fmodl(long double x, long double y) { return fmod(x, y); }
467
+ __CRT_INLINE long double __cdecl hypotl(long double x, long double y) { return hypot(x, y); }
468
+ __CRT_INLINE long double __cdecl logl(long double x) { return log(x); }
469
+ __CRT_INLINE long double __cdecl logbl(long double x) { return logb(x); }
470
+ __CRT_INLINE long double __cdecl log10l(long double x) { return log10(x); }
471
+ __CRT_INLINE long double __cdecl modfl(long double x, long double* y) { double y1 = *y; x = modf(x, &y1); *y = y1; return x; }
472
+ __CRT_INLINE long double __cdecl powl(long double x, long double y) { return pow(x, y); }
473
+ __CRT_INLINE long double __cdecl sinhl(long double x) { return sinh(x); }
474
+ __CRT_INLINE long double __cdecl sinl(long double x) { return sin(x); }
475
+ __CRT_INLINE long double __cdecl sqrtl(long double x) { return sqrt(x); }
476
+ __CRT_INLINE long double __cdecl tanhl(long double x) { return tanh(x); }
477
+ __CRT_INLINE long double __cdecl tanl(long double x) { return tan(x); }
478
+
479
+ /* Following are accurate, but much shorter implementations than MUSL lib. */
480
+
481
+ __CRT_INLINE double __cdecl log1p(double x) {
482
+ double u = 1.0 + x;
483
+ return u == 1.0 ? x : log(u)*(x / (u - 1.0));
484
+ }
485
+ __CRT_INLINE float __cdecl log1pf(float x) {
486
+ float u = 1.0f + x;
487
+ return u == 1.0f ? x : logf(u)*(x / (u - 1.0f));
488
+ }
489
+ __CRT_INLINE long double __cdecl log1pl(long double x) {
490
+ return log1p(x);
491
+ }
492
+
493
+
494
+ __CRT_INLINE double __cdecl expm1(double x) {
495
+ if (x > 0.0024 || x < -0.0024) return exp(x) - 1.0;
496
+ return x*(1.0 + 0.5*x*(1.0 + (1/3.0)*x*(1.0 + 0.25*x*(1.0 + 0.2*x))));
497
+ }
498
+ __CRT_INLINE float __cdecl expm1f(float x) {
499
+ if (x > 0.085f || x < -0.085f) return expf(x) - 1.0f;
500
+ return x*(1.0f + 0.5f*x*(1.0f + (1/3.0f)*x*(1.0f + 0.25f*x)));
501
+ }
502
+ __CRT_INLINE long double __cdecl expm1l(long double x) {
503
+ return expm1(x);
504
+ }
505
+
506
+
507
+ __CRT_INLINE double __cdecl cbrt(double x) {
508
+ return x < 0.0 ? -pow(-x, 1/3.0) : pow(x, 1/3.0);
509
+ }
510
+ __CRT_INLINE float __cdecl cbrtf(float x) {
511
+ return x < 0.0f ? -pow(-x, 1/3.0) : pow(x, 1/3.0);
512
+ }
513
+ __CRT_INLINE long double __cdecl cbrtl(long double x) {
514
+ return cbrt(x);
515
+ }
516
+
517
+
518
+ __CRT_INLINE double __cdecl log2(double x) {
519
+ return log(x) * 1.442695040888963407;
520
+ }
521
+ __CRT_INLINE float __cdecl log2f(float x) {
522
+ return log(x) * 1.442695040888963407;
523
+ }
524
+ __CRT_INLINE long double __cdecl log2l(long double x) {
525
+ return log(x) * 1.442695040888963407;
526
+ }
527
+
528
+
529
+ __CRT_INLINE double __cdecl exp2(double x) {
530
+ return exp(x * 0.693147180559945309);
531
+ }
532
+ __CRT_INLINE float __cdecl exp2f(float x) {
533
+ return exp(x * 0.693147180559945309);
534
+ }
535
+ __CRT_INLINE long double __cdecl exp2l(long double x) {
536
+ return exp(x * 0.693147180559945309);
537
+ }
538
+
539
+
540
+ __CRT_INLINE int __cdecl ilogb(double x) {
541
+ return (int) logb(x);
542
+ }
543
+ __CRT_INLINE int __cdecl ilogbf(float x) {
544
+ return (int) logbf(x);
545
+ }
546
+ __CRT_INLINE int __cdecl ilogbl(long double x) {
547
+ return (int) logb(x);
548
+ }
549
+
550
+
551
+ __CRT_INLINE double __cdecl fdim(double x, double y) {
552
+ if (isnan(x) || isnan(y)) return NAN;
553
+ return x > y ? x - y : 0;
554
+ }
555
+ __CRT_INLINE float __cdecl fdimf(float x, float y) {
556
+ if (isnan(x) || isnan(y)) return NAN;
557
+ return x > y ? x - y : 0;
558
+ }
559
+ __CRT_INLINE long double __cdecl fdiml(long double x, long double y) {
560
+ if (isnan(x) || isnan(y)) return NAN;
561
+ return x > y ? x - y : 0;
562
+ }
563
+
564
+
565
+ /* tgamma and lgamma: Lanczos approximation
566
+ * https://rosettacode.org/wiki/Gamma_function
567
+ * https://www.johndcook.com/blog/cpp_gamma
568
+ */
569
+
570
+ __CRT_INLINE double __cdecl tgamma(double x) {
571
+ double m = 1.0, t = 3.14159265358979323;
572
+ if (x == floor(x)) {
573
+ if (x == 0) return INFINITY;
574
+ if (x < 0) return NAN;
575
+ if (x < 26) { for (double k = 2; k < x; ++k) m *= k; return m; }
576
+ }
577
+ if (x < 0.5)
578
+ return t / (sin(t*x)*tgamma(1.0 - x));
579
+ if (x > 12.0)
580
+ return exp(lgamma(x));
581
+
582
+ static const double c[8] = {676.5203681218851, -1259.1392167224028,
583
+ 771.32342877765313, -176.61502916214059,
584
+ 12.507343278686905, -0.13857109526572012,
585
+ 9.9843695780195716e-6, 1.5056327351493116e-7};
586
+ m = 0.99999999999980993, t = x + 6.5; /* x-1+8-.5 */
587
+ for (int k = 0; k < 8; ++k) m += c[k] / (x + k);
588
+ return 2.50662827463100050 * pow(t, x - 0.5)*exp(-t)*m; /* C=sqrt(2pi) */
589
+ }
590
+
591
+
592
+ __CRT_INLINE double __cdecl lgamma(double x) {
593
+ if (x < 12.0) {
594
+ if (x <= 0.0 && x == floor(x)) return INFINITY;
595
+ x = tgamma(x);
596
+ return log(x < 0.0 ? -x : x);
597
+ }
598
+ static const double c[7] = {1.0/12.0, -1.0/360.0, 1.0/1260.0, -1.0/1680.0,
599
+ 1.0/1188.0, -691.0/360360.0, 1.0/156.0};
600
+ double m = -3617.0/122400.0, t = 1.0 / (x*x);
601
+ for (int k = 6; k >= 0; --k) m = m*t + c[k];
602
+ return (x - 0.5)*log(x) - x + 0.918938533204672742 + m / x; /* C=log(2pi)/2 */
603
+ }
604
+
605
+ __CRT_INLINE float __cdecl tgammaf(float x) {
606
+ return tgamma(x);
607
+ }
608
+ __CRT_INLINE float __cdecl lgammaf(float x) {
609
+ return lgamma(x);
610
+ }
611
+ __CRT_INLINE long double __cdecl tgammal(long double x) {
612
+ return tgamma(x);
613
+ }
614
+ __CRT_INLINE long double __cdecl lgammal(long double x) {
615
+ return lgamma(x);
616
+ }
617
+
201
618
  #endif /* _TCC_LIBM_H_ */