triton-windows 3.2.0.post19__cp310-cp310-win_amd64.whl → 3.2.0.post21__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (110) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/backends/amd/driver.py +6 -1
  3. triton/backends/nvidia/compiler.py +1 -3
  4. triton/backends/nvidia/driver.py +7 -3
  5. triton/runtime/autotuner.py +2 -2
  6. triton/runtime/build.py +5 -5
  7. triton/runtime/tcc/lib/python310.def +1610 -0
  8. triton/runtime/tcc/lib/python311.def +1633 -0
  9. triton/runtime/tcc/lib/python312.def +1703 -0
  10. triton/runtime/tcc/lib/python313.def +1651 -0
  11. triton/runtime/tcc/lib/python313t.def +1656 -0
  12. triton/runtime/tcc/lib/python39.def +1644 -0
  13. triton/runtime/tcc/lib/python3t.def +905 -0
  14. triton/windows_utils.py +11 -4
  15. {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/METADATA +1 -1
  16. {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/RECORD +18 -103
  17. {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/WHEEL +1 -1
  18. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  19. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1031
  20. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1612
  21. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1337
  22. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  23. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  24. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  25. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -829
  26. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  27. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  28. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  29. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  30. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  31. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  32. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  33. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  34. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  35. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -494
  36. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  37. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  38. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  39. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  40. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  41. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  42. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  43. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1350
  44. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  45. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  46. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  47. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  48. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  49. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10169
  50. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -77
  51. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -180
  52. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  53. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  54. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  55. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  56. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  57. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  58. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  59. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  60. triton/backends/amd/include/hip/device_functions.h +0 -38
  61. triton/backends/amd/include/hip/driver_types.h +0 -468
  62. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  63. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  64. triton/backends/amd/include/hip/hip_common.h +0 -100
  65. triton/backends/amd/include/hip/hip_complex.h +0 -38
  66. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  67. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  68. triton/backends/amd/include/hip/hip_ext.h +0 -159
  69. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  70. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  71. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  72. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  73. triton/backends/amd/include/hip/hip_profile.h +0 -27
  74. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  75. triton/backends/amd/include/hip/hip_runtime_api.h +0 -8919
  76. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  77. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  78. triton/backends/amd/include/hip/hip_version.h +0 -17
  79. triton/backends/amd/include/hip/hiprtc.h +0 -421
  80. triton/backends/amd/include/hip/library_types.h +0 -78
  81. triton/backends/amd/include/hip/math_functions.h +0 -42
  82. triton/backends/amd/include/hip/surface_types.h +0 -63
  83. triton/backends/amd/include/hip/texture_types.h +0 -194
  84. triton/backends/amd/include/hsa/Brig.h +0 -1131
  85. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  86. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -436
  87. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  88. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  89. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  90. triton/backends/amd/include/hsa/hsa.h +0 -5729
  91. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  92. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -566
  93. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3090
  94. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  95. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  96. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  97. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  98. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  99. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4435
  100. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1467
  101. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3027
  102. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  103. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  104. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  105. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  106. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  107. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  108. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  109. triton/backends/amd/include/roctracer/roctx.h +0 -229
  110. {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/top_level.txt +0 -0
triton/_C/libtriton.pyd CHANGED
Binary file
@@ -362,11 +362,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
362
362
  if (!PyLong_Check(ret)) {{
363
363
  PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
364
364
  ptr_info.valid = false;
365
+ Py_DECREF(ret);
365
366
  return ptr_info;
366
367
  }}
367
368
  ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
368
- if(!ptr_info.dev_ptr)
369
+ if(!ptr_info.dev_ptr) {{
370
+ Py_DECREF(ret);
369
371
  return ptr_info;
372
+ }}
370
373
  uint64_t dev_ptr;
371
374
  hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
372
375
  if (status == hipErrorInvalidValue) {{
@@ -410,6 +413,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
410
413
  Py_DECREF(args);
411
414
  if (!ret)
412
415
  return NULL;
416
+ Py_DECREF(ret);
413
417
  }}
414
418
 
415
419
 
@@ -423,6 +427,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
423
427
  Py_DECREF(args);
424
428
  if (!ret)
425
429
  return NULL;
430
+ Py_DECREF(ret);
426
431
  }}
427
432
 
428
433
  if(PyErr_Occurred()) {{
@@ -122,7 +122,7 @@ class CUDAOptions:
122
122
  cluster_dims: tuple = (1, 1, 1)
123
123
  ptx_version: int = None
124
124
  enable_fp_fusion: bool = True
125
- supported_fp8_dtypes: Tuple[str] = ("fp8e5", "fp8e4b15")
125
+ supported_fp8_dtypes: Tuple[str] = ("fp8e4nv", "fp8e5", "fp8e4b15")
126
126
  deprecated_fp8_dtypes: Tuple[str] = ()
127
127
  default_dot_input_precision: str = "tf32"
128
128
  allowed_dot_input_precisions: Tuple[str] = ("tf32", "tf32x3", "ieee")
@@ -164,8 +164,6 @@ class CUDABackend(BaseBackend):
164
164
  args = {k: opts[k] for k in CUDAOptions.__dataclass_fields__.keys() if k in opts}
165
165
  if "supported_fp8_dtypes" not in args:
166
166
  supported_fp8_dtypes = set(CUDAOptions.supported_fp8_dtypes)
167
- if self.capability >= 89:
168
- supported_fp8_dtypes.add("fp8e4nv")
169
167
  args["supported_fp8_dtypes"] = tuple(sorted(supported_fp8_dtypes))
170
168
 
171
169
  if "deprecated_fp8_dtypes" not in args:
@@ -314,11 +314,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
314
314
  if (!PyLong_Check(ret)) {{
315
315
  PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
316
316
  ptr_info.valid = false;
317
+ Py_DECREF(ret);
317
318
  return ptr_info;
318
319
  }}
319
320
  ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret);
320
- if(!ptr_info.dev_ptr)
321
+ if(!ptr_info.dev_ptr) {{
322
+ Py_DECREF(ret);
321
323
  return ptr_info;
324
+ }}
322
325
  uint64_t dev_ptr;
323
326
  int status = cuPointerGetAttribute(&dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
324
327
  if (status == CUDA_ERROR_INVALID_VALUE) {{
@@ -330,7 +333,7 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
330
333
  ptr_info.valid = false;
331
334
  }}
332
335
  ptr_info.dev_ptr = dev_ptr;
333
- Py_DECREF(ret); // Thanks ChatGPT!
336
+ Py_DECREF(ret);
334
337
  return ptr_info;
335
338
  }}
336
339
  PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
@@ -427,6 +430,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
427
430
  Py_DECREF(args);
428
431
  if (!ret)
429
432
  return NULL;
433
+ Py_DECREF(ret);
430
434
  }}
431
435
 
432
436
  // raise exception asap
@@ -445,7 +449,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
445
449
  Py_DECREF(args);
446
450
  if (!ret)
447
451
  return NULL;
448
-
452
+ Py_DECREF(ret);
449
453
  }}
450
454
 
451
455
  // return None
@@ -182,9 +182,9 @@ class Autotuner(KernelInterface):
182
182
  # prune configs
183
183
  used_cached_result = False
184
184
  pruned_configs = self.prune_configs(kwargs)
185
- bench_start = time.time()
185
+ bench_start = time.perf_counter()
186
186
  timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
187
- bench_end = time.time()
187
+ bench_end = time.perf_counter()
188
188
  self.bench_time = bench_end - bench_start
189
189
  self.cache[key] = builtins.min(timings, key=timings.get)
190
190
  full_nargs = {**self.nargs, **kwargs, **self.cache[key].all_kwargs()}
triton/runtime/build.py CHANGED
@@ -94,17 +94,17 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
94
94
  scheme = 'posix_prefix'
95
95
  py_include_dir = sysconfig.get_paths(scheme=scheme)["include"]
96
96
  custom_backend_dirs = set(os.getenv(var) for var in ('TRITON_CUDACRT_PATH', 'TRITON_CUDART_PATH'))
97
+ # Don't append in place
97
98
  include_dirs = include_dirs + [srcdir, py_include_dir, *custom_backend_dirs]
98
99
  if os.name == "nt":
99
- library_dirs += find_python()
100
+ library_dirs = library_dirs + find_python()
100
101
  # Link against Python stable ABI
101
- # libraries is modified in place
102
102
  if "python3" not in libraries:
103
- libraries += ["python3"]
103
+ libraries = libraries + ["python3"]
104
104
  if is_msvc(cc):
105
105
  _, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
106
- include_dirs += msvc_winsdk_inc_dirs
107
- library_dirs += msvc_winsdk_lib_dirs
106
+ include_dirs = include_dirs + msvc_winsdk_inc_dirs
107
+ library_dirs = library_dirs + msvc_winsdk_lib_dirs
108
108
  cc_cmd = _cc_cmd(cc, src, so, include_dirs, library_dirs, libraries)
109
109
 
110
110
  try: