triton-windows 3.2.0.post19__cp312-cp312-win_amd64.whl → 3.2.0.post21__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/backends/amd/driver.py +6 -1
- triton/backends/nvidia/compiler.py +1 -3
- triton/backends/nvidia/driver.py +7 -3
- triton/runtime/autotuner.py +2 -2
- triton/runtime/build.py +5 -5
- triton/runtime/tcc/lib/python310.def +1610 -0
- triton/runtime/tcc/lib/python311.def +1633 -0
- triton/runtime/tcc/lib/python312.def +1703 -0
- triton/runtime/tcc/lib/python313.def +1651 -0
- triton/runtime/tcc/lib/python313t.def +1656 -0
- triton/runtime/tcc/lib/python39.def +1644 -0
- triton/runtime/tcc/lib/python3t.def +905 -0
- triton/windows_utils.py +11 -4
- {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/METADATA +1 -1
- {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/RECORD +18 -103
- {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/WHEEL +1 -1
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1031
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1612
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1337
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -829
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -494
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1350
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10169
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -77
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -180
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -159
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -8919
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -436
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5729
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -566
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3090
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4435
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1467
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3027
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- {triton_windows-3.2.0.post19.dist-info → triton_windows-3.2.0.post21.dist-info}/top_level.txt +0 -0
triton/_C/libtriton.pyd
CHANGED
|
Binary file
|
triton/backends/amd/driver.py
CHANGED
|
@@ -362,11 +362,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
|
|
|
362
362
|
if (!PyLong_Check(ret)) {{
|
|
363
363
|
PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
|
|
364
364
|
ptr_info.valid = false;
|
|
365
|
+
Py_DECREF(ret);
|
|
365
366
|
return ptr_info;
|
|
366
367
|
}}
|
|
367
368
|
ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
|
|
368
|
-
if(!ptr_info.dev_ptr)
|
|
369
|
+
if(!ptr_info.dev_ptr) {{
|
|
370
|
+
Py_DECREF(ret);
|
|
369
371
|
return ptr_info;
|
|
372
|
+
}}
|
|
370
373
|
uint64_t dev_ptr;
|
|
371
374
|
hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
|
|
372
375
|
if (status == hipErrorInvalidValue) {{
|
|
@@ -410,6 +413,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
410
413
|
Py_DECREF(args);
|
|
411
414
|
if (!ret)
|
|
412
415
|
return NULL;
|
|
416
|
+
Py_DECREF(ret);
|
|
413
417
|
}}
|
|
414
418
|
|
|
415
419
|
|
|
@@ -423,6 +427,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
423
427
|
Py_DECREF(args);
|
|
424
428
|
if (!ret)
|
|
425
429
|
return NULL;
|
|
430
|
+
Py_DECREF(ret);
|
|
426
431
|
}}
|
|
427
432
|
|
|
428
433
|
if(PyErr_Occurred()) {{
|
|
@@ -122,7 +122,7 @@ class CUDAOptions:
|
|
|
122
122
|
cluster_dims: tuple = (1, 1, 1)
|
|
123
123
|
ptx_version: int = None
|
|
124
124
|
enable_fp_fusion: bool = True
|
|
125
|
-
supported_fp8_dtypes: Tuple[str] = ("fp8e5", "fp8e4b15")
|
|
125
|
+
supported_fp8_dtypes: Tuple[str] = ("fp8e4nv", "fp8e5", "fp8e4b15")
|
|
126
126
|
deprecated_fp8_dtypes: Tuple[str] = ()
|
|
127
127
|
default_dot_input_precision: str = "tf32"
|
|
128
128
|
allowed_dot_input_precisions: Tuple[str] = ("tf32", "tf32x3", "ieee")
|
|
@@ -164,8 +164,6 @@ class CUDABackend(BaseBackend):
|
|
|
164
164
|
args = {k: opts[k] for k in CUDAOptions.__dataclass_fields__.keys() if k in opts}
|
|
165
165
|
if "supported_fp8_dtypes" not in args:
|
|
166
166
|
supported_fp8_dtypes = set(CUDAOptions.supported_fp8_dtypes)
|
|
167
|
-
if self.capability >= 89:
|
|
168
|
-
supported_fp8_dtypes.add("fp8e4nv")
|
|
169
167
|
args["supported_fp8_dtypes"] = tuple(sorted(supported_fp8_dtypes))
|
|
170
168
|
|
|
171
169
|
if "deprecated_fp8_dtypes" not in args:
|
triton/backends/nvidia/driver.py
CHANGED
|
@@ -314,11 +314,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
|
|
|
314
314
|
if (!PyLong_Check(ret)) {{
|
|
315
315
|
PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
|
|
316
316
|
ptr_info.valid = false;
|
|
317
|
+
Py_DECREF(ret);
|
|
317
318
|
return ptr_info;
|
|
318
319
|
}}
|
|
319
320
|
ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret);
|
|
320
|
-
if(!ptr_info.dev_ptr)
|
|
321
|
+
if(!ptr_info.dev_ptr) {{
|
|
322
|
+
Py_DECREF(ret);
|
|
321
323
|
return ptr_info;
|
|
324
|
+
}}
|
|
322
325
|
uint64_t dev_ptr;
|
|
323
326
|
int status = cuPointerGetAttribute(&dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
|
|
324
327
|
if (status == CUDA_ERROR_INVALID_VALUE) {{
|
|
@@ -330,7 +333,7 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
|
|
|
330
333
|
ptr_info.valid = false;
|
|
331
334
|
}}
|
|
332
335
|
ptr_info.dev_ptr = dev_ptr;
|
|
333
|
-
Py_DECREF(ret);
|
|
336
|
+
Py_DECREF(ret);
|
|
334
337
|
return ptr_info;
|
|
335
338
|
}}
|
|
336
339
|
PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
|
|
@@ -427,6 +430,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
427
430
|
Py_DECREF(args);
|
|
428
431
|
if (!ret)
|
|
429
432
|
return NULL;
|
|
433
|
+
Py_DECREF(ret);
|
|
430
434
|
}}
|
|
431
435
|
|
|
432
436
|
// raise exception asap
|
|
@@ -445,7 +449,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
445
449
|
Py_DECREF(args);
|
|
446
450
|
if (!ret)
|
|
447
451
|
return NULL;
|
|
448
|
-
|
|
452
|
+
Py_DECREF(ret);
|
|
449
453
|
}}
|
|
450
454
|
|
|
451
455
|
// return None
|
triton/runtime/autotuner.py
CHANGED
|
@@ -182,9 +182,9 @@ class Autotuner(KernelInterface):
|
|
|
182
182
|
# prune configs
|
|
183
183
|
used_cached_result = False
|
|
184
184
|
pruned_configs = self.prune_configs(kwargs)
|
|
185
|
-
bench_start = time.
|
|
185
|
+
bench_start = time.perf_counter()
|
|
186
186
|
timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
|
|
187
|
-
bench_end = time.
|
|
187
|
+
bench_end = time.perf_counter()
|
|
188
188
|
self.bench_time = bench_end - bench_start
|
|
189
189
|
self.cache[key] = builtins.min(timings, key=timings.get)
|
|
190
190
|
full_nargs = {**self.nargs, **kwargs, **self.cache[key].all_kwargs()}
|
triton/runtime/build.py
CHANGED
|
@@ -94,17 +94,17 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
|
|
|
94
94
|
scheme = 'posix_prefix'
|
|
95
95
|
py_include_dir = sysconfig.get_paths(scheme=scheme)["include"]
|
|
96
96
|
custom_backend_dirs = set(os.getenv(var) for var in ('TRITON_CUDACRT_PATH', 'TRITON_CUDART_PATH'))
|
|
97
|
+
# Don't append in place
|
|
97
98
|
include_dirs = include_dirs + [srcdir, py_include_dir, *custom_backend_dirs]
|
|
98
99
|
if os.name == "nt":
|
|
99
|
-
library_dirs
|
|
100
|
+
library_dirs = library_dirs + find_python()
|
|
100
101
|
# Link against Python stable ABI
|
|
101
|
-
# libraries is modified in place
|
|
102
102
|
if "python3" not in libraries:
|
|
103
|
-
libraries
|
|
103
|
+
libraries = libraries + ["python3"]
|
|
104
104
|
if is_msvc(cc):
|
|
105
105
|
_, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
|
|
106
|
-
include_dirs
|
|
107
|
-
library_dirs
|
|
106
|
+
include_dirs = include_dirs + msvc_winsdk_inc_dirs
|
|
107
|
+
library_dirs = library_dirs + msvc_winsdk_lib_dirs
|
|
108
108
|
cc_cmd = _cc_cmd(cc, src, so, include_dirs, library_dirs, libraries)
|
|
109
109
|
|
|
110
110
|
try:
|