triton-windows 3.2.0.post18__cp310-cp310-win_amd64.whl → 3.2.0.post21__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (111) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/backends/amd/driver.py +6 -1
  3. triton/backends/nvidia/compiler.py +1 -3
  4. triton/backends/nvidia/driver.c +1 -0
  5. triton/backends/nvidia/driver.py +8 -3
  6. triton/runtime/autotuner.py +2 -2
  7. triton/runtime/build.py +14 -6
  8. triton/runtime/tcc/lib/python310.def +1610 -0
  9. triton/runtime/tcc/lib/python311.def +1633 -0
  10. triton/runtime/tcc/lib/python312.def +1703 -0
  11. triton/runtime/tcc/lib/python313.def +1651 -0
  12. triton/runtime/tcc/lib/python313t.def +1656 -0
  13. triton/runtime/tcc/lib/python39.def +1644 -0
  14. triton/runtime/tcc/lib/python3t.def +905 -0
  15. triton/windows_utils.py +11 -4
  16. {triton_windows-3.2.0.post18.dist-info → triton_windows-3.2.0.post21.dist-info}/METADATA +1 -1
  17. {triton_windows-3.2.0.post18.dist-info → triton_windows-3.2.0.post21.dist-info}/RECORD +19 -104
  18. {triton_windows-3.2.0.post18.dist-info → triton_windows-3.2.0.post21.dist-info}/WHEEL +1 -1
  19. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  20. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1031
  21. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1612
  22. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1337
  23. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  24. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  25. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  26. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -829
  27. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  28. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  29. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  30. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  31. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  32. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  33. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  34. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  35. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  36. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -494
  37. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  38. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  39. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  40. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  41. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  42. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  43. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  44. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1350
  45. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  46. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  47. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  48. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  49. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  50. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10169
  51. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -77
  52. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -180
  53. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  54. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  55. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  56. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  57. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  58. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  59. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  60. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  61. triton/backends/amd/include/hip/device_functions.h +0 -38
  62. triton/backends/amd/include/hip/driver_types.h +0 -468
  63. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  64. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  65. triton/backends/amd/include/hip/hip_common.h +0 -100
  66. triton/backends/amd/include/hip/hip_complex.h +0 -38
  67. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  68. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  69. triton/backends/amd/include/hip/hip_ext.h +0 -159
  70. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  71. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  72. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  73. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  74. triton/backends/amd/include/hip/hip_profile.h +0 -27
  75. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  76. triton/backends/amd/include/hip/hip_runtime_api.h +0 -8919
  77. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  78. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  79. triton/backends/amd/include/hip/hip_version.h +0 -17
  80. triton/backends/amd/include/hip/hiprtc.h +0 -421
  81. triton/backends/amd/include/hip/library_types.h +0 -78
  82. triton/backends/amd/include/hip/math_functions.h +0 -42
  83. triton/backends/amd/include/hip/surface_types.h +0 -63
  84. triton/backends/amd/include/hip/texture_types.h +0 -194
  85. triton/backends/amd/include/hsa/Brig.h +0 -1131
  86. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  87. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -436
  88. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  89. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  90. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  91. triton/backends/amd/include/hsa/hsa.h +0 -5729
  92. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  93. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -566
  94. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3090
  95. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  96. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  97. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  98. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  99. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  100. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4435
  101. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1467
  102. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3027
  103. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  104. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  105. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  106. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  107. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  108. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  109. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  110. triton/backends/amd/include/roctracer/roctx.h +0 -229
  111. {triton_windows-3.2.0.post18.dist-info → triton_windows-3.2.0.post21.dist-info}/top_level.txt +0 -0
triton/_C/libtriton.pyd CHANGED
Binary file
@@ -362,11 +362,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
362
362
  if (!PyLong_Check(ret)) {{
363
363
  PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
364
364
  ptr_info.valid = false;
365
+ Py_DECREF(ret);
365
366
  return ptr_info;
366
367
  }}
367
368
  ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
368
- if(!ptr_info.dev_ptr)
369
+ if(!ptr_info.dev_ptr) {{
370
+ Py_DECREF(ret);
369
371
  return ptr_info;
372
+ }}
370
373
  uint64_t dev_ptr;
371
374
  hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
372
375
  if (status == hipErrorInvalidValue) {{
@@ -410,6 +413,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
410
413
  Py_DECREF(args);
411
414
  if (!ret)
412
415
  return NULL;
416
+ Py_DECREF(ret);
413
417
  }}
414
418
 
415
419
 
@@ -423,6 +427,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
423
427
  Py_DECREF(args);
424
428
  if (!ret)
425
429
  return NULL;
430
+ Py_DECREF(ret);
426
431
  }}
427
432
 
428
433
  if(PyErr_Occurred()) {{
@@ -122,7 +122,7 @@ class CUDAOptions:
122
122
  cluster_dims: tuple = (1, 1, 1)
123
123
  ptx_version: int = None
124
124
  enable_fp_fusion: bool = True
125
- supported_fp8_dtypes: Tuple[str] = ("fp8e5", "fp8e4b15")
125
+ supported_fp8_dtypes: Tuple[str] = ("fp8e4nv", "fp8e5", "fp8e4b15")
126
126
  deprecated_fp8_dtypes: Tuple[str] = ()
127
127
  default_dot_input_precision: str = "tf32"
128
128
  allowed_dot_input_precisions: Tuple[str] = ("tf32", "tf32x3", "ieee")
@@ -164,8 +164,6 @@ class CUDABackend(BaseBackend):
164
164
  args = {k: opts[k] for k in CUDAOptions.__dataclass_fields__.keys() if k in opts}
165
165
  if "supported_fp8_dtypes" not in args:
166
166
  supported_fp8_dtypes = set(CUDAOptions.supported_fp8_dtypes)
167
- if self.capability >= 89:
168
- supported_fp8_dtypes.add("fp8e4nv")
169
167
  args["supported_fp8_dtypes"] = tuple(sorted(supported_fp8_dtypes))
170
168
 
171
169
  if "deprecated_fp8_dtypes" not in args:
@@ -1,3 +1,4 @@
1
+ #define _CRT_SECURE_NO_WARNINGS
1
2
  #include "cuda.h"
2
3
 
3
4
  #ifndef _WIN32
@@ -181,6 +181,7 @@ def make_launcher(constants, signature, ids):
181
181
  else:
182
182
  params_decl = "void **params = NULL;"
183
183
  src = f"""
184
+ #define _CRT_SECURE_NO_WARNINGS
184
185
  #include \"cuda.h\"
185
186
  #include <stdbool.h>
186
187
  #define PY_SSIZE_T_CLEAN
@@ -313,11 +314,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
313
314
  if (!PyLong_Check(ret)) {{
314
315
  PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
315
316
  ptr_info.valid = false;
317
+ Py_DECREF(ret);
316
318
  return ptr_info;
317
319
  }}
318
320
  ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret);
319
- if(!ptr_info.dev_ptr)
321
+ if(!ptr_info.dev_ptr) {{
322
+ Py_DECREF(ret);
320
323
  return ptr_info;
324
+ }}
321
325
  uint64_t dev_ptr;
322
326
  int status = cuPointerGetAttribute(&dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
323
327
  if (status == CUDA_ERROR_INVALID_VALUE) {{
@@ -329,7 +333,7 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
329
333
  ptr_info.valid = false;
330
334
  }}
331
335
  ptr_info.dev_ptr = dev_ptr;
332
- Py_DECREF(ret); // Thanks ChatGPT!
336
+ Py_DECREF(ret);
333
337
  return ptr_info;
334
338
  }}
335
339
  PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
@@ -426,6 +430,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
426
430
  Py_DECREF(args);
427
431
  if (!ret)
428
432
  return NULL;
433
+ Py_DECREF(ret);
429
434
  }}
430
435
 
431
436
  // raise exception asap
@@ -444,7 +449,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
444
449
  Py_DECREF(args);
445
450
  if (!ret)
446
451
  return NULL;
447
-
452
+ Py_DECREF(ret);
448
453
  }}
449
454
 
450
455
  // return None
@@ -182,9 +182,9 @@ class Autotuner(KernelInterface):
182
182
  # prune configs
183
183
  used_cached_result = False
184
184
  pruned_configs = self.prune_configs(kwargs)
185
- bench_start = time.time()
185
+ bench_start = time.perf_counter()
186
186
  timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
187
- bench_end = time.time()
187
+ bench_end = time.perf_counter()
188
188
  self.bench_time = bench_end - bench_start
189
189
  self.cache[key] = builtins.min(timings, key=timings.get)
190
190
  full_nargs = {**self.nargs, **kwargs, **self.cache[key].all_kwargs()}
triton/runtime/build.py CHANGED
@@ -49,6 +49,11 @@ def is_msvc(cc):
49
49
  return cc == "cl" or cc == "cl.exe"
50
50
 
51
51
 
52
+ def is_clang(cc):
53
+ cc = os.path.basename(cc).lower()
54
+ return cc == "clang" or cc == "clang.exe"
55
+
56
+
52
57
  def _cc_cmd(cc, src, out, include_dirs, library_dirs, libraries):
53
58
  if is_msvc(cc):
54
59
  out_base = os.path.splitext(out)[0]
@@ -63,7 +68,10 @@ def _cc_cmd(cc, src, out, include_dirs, library_dirs, libraries):
63
68
  cc_cmd += [f"/PDB:{out_base + '.pdb'}"]
64
69
  else:
65
70
  # for -Wno-psabi, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111047
66
- cc_cmd = [cc, src, "-O3", "-shared", "-fPIC", "-Wno-psabi", "-o", out]
71
+ cc_cmd = [cc, src, "-O3", "-shared", "-Wno-psabi", "-o", out]
72
+ if not (os.name == "nt" and is_clang(cc)):
73
+ # Clang does not support -fPIC on Windows
74
+ cc_cmd += ["-fPIC"]
67
75
  cc_cmd += [f'-l{lib}' for lib in libraries]
68
76
  cc_cmd += [f"-L{dir}" for dir in library_dirs]
69
77
  cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None]
@@ -86,17 +94,17 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
86
94
  scheme = 'posix_prefix'
87
95
  py_include_dir = sysconfig.get_paths(scheme=scheme)["include"]
88
96
  custom_backend_dirs = set(os.getenv(var) for var in ('TRITON_CUDACRT_PATH', 'TRITON_CUDART_PATH'))
97
+ # Don't append in place
89
98
  include_dirs = include_dirs + [srcdir, py_include_dir, *custom_backend_dirs]
90
99
  if os.name == "nt":
91
- library_dirs += find_python()
100
+ library_dirs = library_dirs + find_python()
92
101
  # Link against Python stable ABI
93
- # libraries is modified in place
94
102
  if "python3" not in libraries:
95
- libraries += ["python3"]
103
+ libraries = libraries + ["python3"]
96
104
  if is_msvc(cc):
97
105
  _, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
98
- include_dirs += msvc_winsdk_inc_dirs
99
- library_dirs += msvc_winsdk_lib_dirs
106
+ include_dirs = include_dirs + msvc_winsdk_inc_dirs
107
+ library_dirs = library_dirs + msvc_winsdk_lib_dirs
100
108
  cc_cmd = _cc_cmd(cc, src, so, include_dirs, library_dirs, libraries)
101
109
 
102
110
  try: