triton-windows 3.3.0.post19__cp312-cp312-win_amd64.whl → 3.3.1.post21__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (116) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +1 -1
  3. triton/backends/amd/driver.py +6 -1
  4. triton/backends/nvidia/compiler.py +1 -3
  5. triton/backends/nvidia/driver.py +7 -3
  6. triton/runtime/autotuner.py +2 -2
  7. triton/runtime/build.py +5 -5
  8. triton/runtime/tcc/lib/python310.def +1610 -0
  9. triton/runtime/tcc/lib/python311.def +1633 -0
  10. triton/runtime/tcc/lib/python312.def +1703 -0
  11. triton/runtime/tcc/lib/python313.def +1651 -0
  12. triton/runtime/tcc/lib/python313t.def +1656 -0
  13. triton/runtime/tcc/lib/python39.def +1644 -0
  14. triton/runtime/tcc/lib/python3t.def +905 -0
  15. triton/windows_utils.py +11 -4
  16. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/METADATA +1 -1
  17. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/RECORD +19 -109
  18. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/WHEEL +1 -1
  19. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  20. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
  21. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
  22. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
  23. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  24. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  25. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  26. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
  27. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  28. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
  29. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  30. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  31. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  32. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  33. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  34. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  35. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  36. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  37. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
  38. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
  39. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  40. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  41. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  42. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  43. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  44. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  45. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  46. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
  47. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  48. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  49. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  50. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  51. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  52. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
  53. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
  54. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
  55. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  56. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  57. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  58. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  59. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  60. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  61. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  62. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  63. triton/backends/amd/include/hip/device_functions.h +0 -38
  64. triton/backends/amd/include/hip/driver_types.h +0 -468
  65. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  66. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  67. triton/backends/amd/include/hip/hip_common.h +0 -100
  68. triton/backends/amd/include/hip/hip_complex.h +0 -38
  69. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  70. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  71. triton/backends/amd/include/hip/hip_ext.h +0 -161
  72. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  73. triton/backends/amd/include/hip/hip_fp8.h +0 -33
  74. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  75. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  76. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  77. triton/backends/amd/include/hip/hip_profile.h +0 -27
  78. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  79. triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
  80. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  81. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  82. triton/backends/amd/include/hip/hip_version.h +0 -17
  83. triton/backends/amd/include/hip/hiprtc.h +0 -421
  84. triton/backends/amd/include/hip/library_types.h +0 -78
  85. triton/backends/amd/include/hip/math_functions.h +0 -42
  86. triton/backends/amd/include/hip/surface_types.h +0 -63
  87. triton/backends/amd/include/hip/texture_types.h +0 -194
  88. triton/backends/amd/include/hsa/Brig.h +0 -1131
  89. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  90. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
  91. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  92. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  93. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  94. triton/backends/amd/include/hsa/hsa.h +0 -5738
  95. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  96. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
  97. triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
  98. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
  99. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  100. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  101. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  102. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  103. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
  104. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  105. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
  106. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
  107. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
  108. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  109. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  110. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  111. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  112. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  113. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  114. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  115. triton/backends/amd/include/roctracer/roctx.h +0 -229
  116. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/top_level.txt +0 -0
triton/_C/libtriton.pyd CHANGED
Binary file
triton/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """isort:skip_file"""
2
- __version__ = '3.3.0'
2
+ __version__ = '3.3.1'
3
3
 
4
4
  # ---------------------------------------
5
5
  # Note: import order is significant here.
@@ -383,11 +383,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
383
383
  if (!PyLong_Check(ret)) {{
384
384
  PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
385
385
  ptr_info.valid = false;
386
+ Py_DECREF(ret);
386
387
  return ptr_info;
387
388
  }}
388
389
  ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
389
- if(!ptr_info.dev_ptr)
390
+ if(!ptr_info.dev_ptr) {{
391
+ Py_DECREF(ret);
390
392
  return ptr_info;
393
+ }}
391
394
  uint64_t dev_ptr;
392
395
  hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
393
396
  if (status == hipErrorInvalidValue) {{
@@ -433,6 +436,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
433
436
  Py_DECREF(args);
434
437
  if (!ret)
435
438
  return NULL;
439
+ Py_DECREF(ret);
436
440
  }}
437
441
 
438
442
 
@@ -446,6 +450,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
446
450
  Py_DECREF(args);
447
451
  if (!ret)
448
452
  return NULL;
453
+ Py_DECREF(ret);
449
454
  }}
450
455
 
451
456
  if(PyErr_Occurred()) {{
@@ -152,7 +152,7 @@ class CUDAOptions:
152
152
  ptx_version: int = None
153
153
  enable_fp_fusion: bool = True
154
154
  launch_cooperative_grid: bool = False
155
- supported_fp8_dtypes: Tuple[str] = ("fp8e5", "fp8e4b15")
155
+ supported_fp8_dtypes: Tuple[str] = ("fp8e4nv", "fp8e5", "fp8e4b15")
156
156
  deprecated_fp8_dtypes: Tuple[str] = ()
157
157
  default_dot_input_precision: str = "tf32"
158
158
  allowed_dot_input_precisions: Tuple[str] = ("tf32", "tf32x3", "ieee")
@@ -203,8 +203,6 @@ class CUDABackend(BaseBackend):
203
203
 
204
204
  if "supported_fp8_dtypes" not in args:
205
205
  supported_fp8_dtypes = set(CUDAOptions.supported_fp8_dtypes)
206
- if capability >= 89:
207
- supported_fp8_dtypes.add("fp8e4nv")
208
206
  args["supported_fp8_dtypes"] = tuple(sorted(supported_fp8_dtypes))
209
207
 
210
208
  if "deprecated_fp8_dtypes" not in args:
@@ -372,11 +372,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
372
372
  if (!PyLong_Check(ret)) {{
373
373
  PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
374
374
  ptr_info.valid = false;
375
+ Py_DECREF(ret);
375
376
  return ptr_info;
376
377
  }}
377
378
  ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret);
378
- if(!ptr_info.dev_ptr)
379
+ if(!ptr_info.dev_ptr) {{
380
+ Py_DECREF(ret);
379
381
  return ptr_info;
382
+ }}
380
383
  uint64_t dev_ptr;
381
384
  int status = cuPointerGetAttribute(&dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
382
385
  if (status == CUDA_ERROR_INVALID_VALUE) {{
@@ -388,7 +391,7 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
388
391
  ptr_info.valid = false;
389
392
  }}
390
393
  ptr_info.dev_ptr = dev_ptr;
391
- Py_DECREF(ret); // Thanks ChatGPT!
394
+ Py_DECREF(ret);
392
395
  return ptr_info;
393
396
  }}
394
397
  PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
@@ -488,6 +491,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
488
491
  Py_DECREF(args);
489
492
  if (!ret)
490
493
  return NULL;
494
+ Py_DECREF(ret);
491
495
  }}
492
496
 
493
497
  CUdeviceptr global_scratch = 0;
@@ -515,7 +519,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
515
519
  Py_DECREF(args);
516
520
  if (!ret)
517
521
  return NULL;
518
-
522
+ Py_DECREF(ret);
519
523
  }}
520
524
 
521
525
  Py_RETURN_NONE;
@@ -188,9 +188,9 @@ class Autotuner(KernelInterface):
188
188
  # prune configs
189
189
  used_cached_result = False
190
190
  pruned_configs = self.prune_configs(kwargs)
191
- bench_start = time.time()
191
+ bench_start = time.perf_counter()
192
192
  timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
193
- bench_end = time.time()
193
+ bench_end = time.perf_counter()
194
194
  self.bench_time = bench_end - bench_start
195
195
  self.cache[key] = builtins.min(timings, key=timings.get)
196
196
  full_nargs = {**self.nargs, **kwargs, **self.cache[key].all_kwargs()}
triton/runtime/build.py CHANGED
@@ -80,17 +80,17 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
80
80
  scheme = 'posix_prefix'
81
81
  py_include_dir = sysconfig.get_paths(scheme=scheme)["include"]
82
82
  custom_backend_dirs = set(os.getenv(var) for var in ('TRITON_CUDACRT_PATH', 'TRITON_CUDART_PATH'))
83
+ # Don't append in place
83
84
  include_dirs = include_dirs + [srcdir, py_include_dir, *custom_backend_dirs]
84
85
  if os.name == "nt":
85
- library_dirs += find_python()
86
+ library_dirs = library_dirs + find_python()
86
87
  # Link against Python stable ABI
87
- # libraries is modified in place
88
88
  if "python3" not in libraries:
89
- libraries += ["python3"]
89
+ libraries = libraries + ["python3"]
90
90
  if is_msvc(cc):
91
91
  _, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
92
- include_dirs += msvc_winsdk_inc_dirs
93
- library_dirs += msvc_winsdk_lib_dirs
92
+ include_dirs = include_dirs + msvc_winsdk_inc_dirs
93
+ library_dirs = library_dirs + msvc_winsdk_lib_dirs
94
94
  cc_cmd = _cc_cmd(cc, src, so, include_dirs, library_dirs, libraries)
95
95
 
96
96
  try: