triton-windows 3.2.0.post12__cp39-cp39-win_amd64.whl → 3.3.0a0.post12__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (68) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +3 -3
  3. triton/_internal_testing.py +59 -4
  4. triton/_utils.py +35 -0
  5. triton/backends/amd/compiler.py +121 -74
  6. triton/backends/amd/driver.py +77 -43
  7. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +28 -49
  8. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +35 -9
  9. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +761 -284
  10. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +9 -3
  11. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +1391 -0
  12. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +3 -3
  13. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +44 -0
  14. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +288 -0
  15. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +110 -14
  16. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +504 -103
  17. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +2 -1
  18. triton/backends/amd/include/hip/amd_detail/host_defines.h +4 -0
  19. triton/backends/amd/include/hip/hip_ext.h +4 -2
  20. triton/backends/amd/include/hip/hip_fp8.h +33 -0
  21. triton/backends/amd/include/hip/hip_runtime_api.h +375 -33
  22. triton/backends/amd/include/hip/hip_version.h +3 -3
  23. triton/backends/amd/include/hip/hiprtc.h +25 -25
  24. triton/backends/amd/include/hsa/amd_hsa_elf.h +40 -14
  25. triton/backends/amd/include/hsa/hsa.h +11 -2
  26. triton/backends/amd/include/hsa/hsa_api_trace.h +30 -17
  27. triton/backends/amd/include/hsa/hsa_api_trace_version.h +68 -0
  28. triton/backends/amd/include/hsa/hsa_ext_amd.h +83 -27
  29. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +46 -46
  30. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +416 -0
  31. triton/backends/amd/include/roctracer/hip_ostream_ops.h +84 -4
  32. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +260 -0
  33. triton/backends/amd/include/roctracer/hsa_prof_str.h +51 -19
  34. triton/backends/amd/lib/asanrtl.bc +0 -0
  35. triton/backends/compiler.py +25 -225
  36. triton/backends/driver.py +7 -2
  37. triton/backends/nvidia/bin/ptxas.exe +0 -0
  38. triton/backends/nvidia/compiler.py +135 -90
  39. triton/backends/nvidia/driver.c +0 -1
  40. triton/backends/nvidia/driver.py +135 -49
  41. triton/backends/nvidia/include/cuda.h +2162 -241
  42. triton/backends/nvidia/lib/x64/cuda.lib +0 -0
  43. triton/compiler/__init__.py +2 -2
  44. triton/compiler/code_generator.py +334 -231
  45. triton/compiler/compiler.py +77 -66
  46. triton/language/__init__.py +22 -5
  47. triton/language/core.py +448 -74
  48. triton/language/extra/cuda/_experimental_tma.py +3 -5
  49. triton/language/math.py +1 -1
  50. triton/language/random.py +2 -1
  51. triton/language/semantic.py +206 -52
  52. triton/language/standard.py +35 -18
  53. triton/runtime/_allocation.py +32 -0
  54. triton/runtime/autotuner.py +27 -32
  55. triton/runtime/build.py +1 -48
  56. triton/runtime/cache.py +6 -6
  57. triton/runtime/errors.py +10 -0
  58. triton/runtime/interpreter.py +179 -45
  59. triton/runtime/jit.py +149 -190
  60. triton/testing.py +39 -11
  61. triton/tools/compile.py +27 -20
  62. triton/tools/{compile.c → extra/cuda/compile.c} +1 -0
  63. triton/tools/mxfp.py +301 -0
  64. {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/METADATA +5 -2
  65. {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/RECORD +68 -59
  66. {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/top_level.txt +2 -0
  67. /triton/tools/{compile.h → extra/cuda/compile.h} +0 -0
  68. {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/WHEEL +0 -0
@@ -34,6 +34,7 @@ enum HipVdiOpId {
34
34
  // Types of ROCclr commands
35
35
  enum HipVdiCommandKind {
36
36
  kHipVdiCommandKernel = 0x11F0,
37
+ kHipVdiCommandTask = 0x11F1,
37
38
  kHipVdiMemcpyDeviceToHost = 0x11F3,
38
39
  kHipHipVdiMemcpyHostToDevice = 0x11F4,
39
40
  kHipVdiMemcpyDeviceToDevice = 0x11F5,
@@ -41,7 +42,7 @@ enum HipVdiCommandKind {
41
42
  kHipVdiMemcpyHostToDeviceRect = 0x1202,
42
43
  kHipVdiMemcpyDeviceToDeviceRect = 0x1203,
43
44
  kHipVdiFillMemory = 0x1207,
44
- };
45
+ };
45
46
 
46
47
  /**
47
48
  * @brief Initializes activity callback
@@ -127,6 +127,10 @@ template<typename _Tp>
127
127
  struct is_trivial
128
128
  : public integral_constant<bool, __is_trivial(_Tp)>
129
129
  { };
130
+
131
+
132
+ template <bool B, class T, class F> struct conditional { using type = T; };
133
+ template <class T, class F> struct conditional<false, T, F> { using type = F; };
130
134
  }
131
135
  typedef __hip_internal::uint8_t __hip_uint8_t;
132
136
  typedef __hip_internal::uint16_t __hip_uint16_t;
@@ -64,6 +64,8 @@ THE SOFTWARE.
64
64
  * Currently, timing between startEvent and stopEvent does not include the time it takes to perform
65
65
  * a system scope release/cache flush - only the time it takes to issues writes to cache.
66
66
  *
67
+ * @note For this HIP API, the flag 'hipExtAnyOrderLaunch' is not supported on AMD GFX9xx boards.
68
+ *
67
69
  */
68
70
  HIP_PUBLIC_API
69
71
  extern "C" hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
@@ -78,6 +80,7 @@ HIP_PUBLIC_API
78
80
  * @brief This HIP API is deprecated, please use hipExtModuleLaunchKernel() instead.
79
81
  *
80
82
  */
83
+ DEPRECATED("use hipExtModuleLaunchKernel instead")
81
84
  HIP_PUBLIC_API
82
85
  extern "C" hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
83
86
  uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
@@ -85,8 +88,7 @@ extern "C" hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalW
85
88
  uint32_t localWorkSizeZ, size_t sharedMemBytes,
86
89
  hipStream_t hStream, void** kernelParams, void** extra,
87
90
  hipEvent_t startEvent __dparm(NULL),
88
- hipEvent_t stopEvent __dparm(NULL))
89
- __attribute__((deprecated("use hipExtModuleLaunchKernel instead")));
91
+ hipEvent_t stopEvent __dparm(NULL));
90
92
 
91
93
  #if defined(__cplusplus)
92
94
 
@@ -0,0 +1,33 @@
1
+ /*
2
+ Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in
12
+ all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ THE SOFTWARE.
21
+ */
22
+
23
+ #ifndef HIP_INCLUDE_HIP_HIP_FP8_H
24
+ #define HIP_INCLUDE_HIP_HIP_FP8_H
25
+
26
+ #include <hip/hip_common.h>
27
+
28
+ #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
29
+ // We only have fnuz defs for now, which are not supported by other platforms
30
+ #include <hip/amd_detail/amd_hip_fp8.h>
31
+ #endif
32
+
33
+ #endif // HIP_INCLUDE_HIP_HIP_FP8_H