triton-windows 3.2.0.post18__cp310-cp310-win_amd64.whl → 3.2.0.post21__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/backends/amd/driver.py +6 -1
- triton/backends/nvidia/compiler.py +1 -3
- triton/backends/nvidia/driver.c +1 -0
- triton/backends/nvidia/driver.py +8 -3
- triton/runtime/autotuner.py +2 -2
- triton/runtime/build.py +14 -6
- triton/runtime/tcc/lib/python310.def +1610 -0
- triton/runtime/tcc/lib/python311.def +1633 -0
- triton/runtime/tcc/lib/python312.def +1703 -0
- triton/runtime/tcc/lib/python313.def +1651 -0
- triton/runtime/tcc/lib/python313t.def +1656 -0
- triton/runtime/tcc/lib/python39.def +1644 -0
- triton/runtime/tcc/lib/python3t.def +905 -0
- triton/windows_utils.py +11 -4
- {triton_windows-3.2.0.post18.dist-info → triton_windows-3.2.0.post21.dist-info}/METADATA +1 -1
- {triton_windows-3.2.0.post18.dist-info → triton_windows-3.2.0.post21.dist-info}/RECORD +19 -104
- {triton_windows-3.2.0.post18.dist-info → triton_windows-3.2.0.post21.dist-info}/WHEEL +1 -1
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1031
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1612
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1337
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -829
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -494
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1350
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10169
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -77
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -180
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -159
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -8919
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -436
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5729
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -566
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3090
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4435
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1467
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3027
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- {triton_windows-3.2.0.post18.dist-info → triton_windows-3.2.0.post21.dist-info}/top_level.txt +0 -0
triton/_C/libtriton.pyd
CHANGED
|
Binary file
|
triton/backends/amd/driver.py
CHANGED
|
@@ -362,11 +362,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
|
|
|
362
362
|
if (!PyLong_Check(ret)) {{
|
|
363
363
|
PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
|
|
364
364
|
ptr_info.valid = false;
|
|
365
|
+
Py_DECREF(ret);
|
|
365
366
|
return ptr_info;
|
|
366
367
|
}}
|
|
367
368
|
ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
|
|
368
|
-
if(!ptr_info.dev_ptr)
|
|
369
|
+
if(!ptr_info.dev_ptr) {{
|
|
370
|
+
Py_DECREF(ret);
|
|
369
371
|
return ptr_info;
|
|
372
|
+
}}
|
|
370
373
|
uint64_t dev_ptr;
|
|
371
374
|
hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
|
|
372
375
|
if (status == hipErrorInvalidValue) {{
|
|
@@ -410,6 +413,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
410
413
|
Py_DECREF(args);
|
|
411
414
|
if (!ret)
|
|
412
415
|
return NULL;
|
|
416
|
+
Py_DECREF(ret);
|
|
413
417
|
}}
|
|
414
418
|
|
|
415
419
|
|
|
@@ -423,6 +427,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
423
427
|
Py_DECREF(args);
|
|
424
428
|
if (!ret)
|
|
425
429
|
return NULL;
|
|
430
|
+
Py_DECREF(ret);
|
|
426
431
|
}}
|
|
427
432
|
|
|
428
433
|
if(PyErr_Occurred()) {{
|
|
@@ -122,7 +122,7 @@ class CUDAOptions:
|
|
|
122
122
|
cluster_dims: tuple = (1, 1, 1)
|
|
123
123
|
ptx_version: int = None
|
|
124
124
|
enable_fp_fusion: bool = True
|
|
125
|
-
supported_fp8_dtypes: Tuple[str] = ("fp8e5", "fp8e4b15")
|
|
125
|
+
supported_fp8_dtypes: Tuple[str] = ("fp8e4nv", "fp8e5", "fp8e4b15")
|
|
126
126
|
deprecated_fp8_dtypes: Tuple[str] = ()
|
|
127
127
|
default_dot_input_precision: str = "tf32"
|
|
128
128
|
allowed_dot_input_precisions: Tuple[str] = ("tf32", "tf32x3", "ieee")
|
|
@@ -164,8 +164,6 @@ class CUDABackend(BaseBackend):
|
|
|
164
164
|
args = {k: opts[k] for k in CUDAOptions.__dataclass_fields__.keys() if k in opts}
|
|
165
165
|
if "supported_fp8_dtypes" not in args:
|
|
166
166
|
supported_fp8_dtypes = set(CUDAOptions.supported_fp8_dtypes)
|
|
167
|
-
if self.capability >= 89:
|
|
168
|
-
supported_fp8_dtypes.add("fp8e4nv")
|
|
169
167
|
args["supported_fp8_dtypes"] = tuple(sorted(supported_fp8_dtypes))
|
|
170
168
|
|
|
171
169
|
if "deprecated_fp8_dtypes" not in args:
|
triton/backends/nvidia/driver.c
CHANGED
triton/backends/nvidia/driver.py
CHANGED
|
@@ -181,6 +181,7 @@ def make_launcher(constants, signature, ids):
|
|
|
181
181
|
else:
|
|
182
182
|
params_decl = "void **params = NULL;"
|
|
183
183
|
src = f"""
|
|
184
|
+
#define _CRT_SECURE_NO_WARNINGS
|
|
184
185
|
#include \"cuda.h\"
|
|
185
186
|
#include <stdbool.h>
|
|
186
187
|
#define PY_SSIZE_T_CLEAN
|
|
@@ -313,11 +314,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
|
|
|
313
314
|
if (!PyLong_Check(ret)) {{
|
|
314
315
|
PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
|
|
315
316
|
ptr_info.valid = false;
|
|
317
|
+
Py_DECREF(ret);
|
|
316
318
|
return ptr_info;
|
|
317
319
|
}}
|
|
318
320
|
ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret);
|
|
319
|
-
if(!ptr_info.dev_ptr)
|
|
321
|
+
if(!ptr_info.dev_ptr) {{
|
|
322
|
+
Py_DECREF(ret);
|
|
320
323
|
return ptr_info;
|
|
324
|
+
}}
|
|
321
325
|
uint64_t dev_ptr;
|
|
322
326
|
int status = cuPointerGetAttribute(&dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
|
|
323
327
|
if (status == CUDA_ERROR_INVALID_VALUE) {{
|
|
@@ -329,7 +333,7 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
|
|
|
329
333
|
ptr_info.valid = false;
|
|
330
334
|
}}
|
|
331
335
|
ptr_info.dev_ptr = dev_ptr;
|
|
332
|
-
Py_DECREF(ret);
|
|
336
|
+
Py_DECREF(ret);
|
|
333
337
|
return ptr_info;
|
|
334
338
|
}}
|
|
335
339
|
PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
|
|
@@ -426,6 +430,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
426
430
|
Py_DECREF(args);
|
|
427
431
|
if (!ret)
|
|
428
432
|
return NULL;
|
|
433
|
+
Py_DECREF(ret);
|
|
429
434
|
}}
|
|
430
435
|
|
|
431
436
|
// raise exception asap
|
|
@@ -444,7 +449,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
444
449
|
Py_DECREF(args);
|
|
445
450
|
if (!ret)
|
|
446
451
|
return NULL;
|
|
447
|
-
|
|
452
|
+
Py_DECREF(ret);
|
|
448
453
|
}}
|
|
449
454
|
|
|
450
455
|
// return None
|
triton/runtime/autotuner.py
CHANGED
|
@@ -182,9 +182,9 @@ class Autotuner(KernelInterface):
|
|
|
182
182
|
# prune configs
|
|
183
183
|
used_cached_result = False
|
|
184
184
|
pruned_configs = self.prune_configs(kwargs)
|
|
185
|
-
bench_start = time.
|
|
185
|
+
bench_start = time.perf_counter()
|
|
186
186
|
timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
|
|
187
|
-
bench_end = time.
|
|
187
|
+
bench_end = time.perf_counter()
|
|
188
188
|
self.bench_time = bench_end - bench_start
|
|
189
189
|
self.cache[key] = builtins.min(timings, key=timings.get)
|
|
190
190
|
full_nargs = {**self.nargs, **kwargs, **self.cache[key].all_kwargs()}
|
triton/runtime/build.py
CHANGED
|
@@ -49,6 +49,11 @@ def is_msvc(cc):
|
|
|
49
49
|
return cc == "cl" or cc == "cl.exe"
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
def is_clang(cc):
|
|
53
|
+
cc = os.path.basename(cc).lower()
|
|
54
|
+
return cc == "clang" or cc == "clang.exe"
|
|
55
|
+
|
|
56
|
+
|
|
52
57
|
def _cc_cmd(cc, src, out, include_dirs, library_dirs, libraries):
|
|
53
58
|
if is_msvc(cc):
|
|
54
59
|
out_base = os.path.splitext(out)[0]
|
|
@@ -63,7 +68,10 @@ def _cc_cmd(cc, src, out, include_dirs, library_dirs, libraries):
|
|
|
63
68
|
cc_cmd += [f"/PDB:{out_base + '.pdb'}"]
|
|
64
69
|
else:
|
|
65
70
|
# for -Wno-psabi, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111047
|
|
66
|
-
cc_cmd = [cc, src, "-O3", "-shared", "-
|
|
71
|
+
cc_cmd = [cc, src, "-O3", "-shared", "-Wno-psabi", "-o", out]
|
|
72
|
+
if not (os.name == "nt" and is_clang(cc)):
|
|
73
|
+
# Clang does not support -fPIC on Windows
|
|
74
|
+
cc_cmd += ["-fPIC"]
|
|
67
75
|
cc_cmd += [f'-l{lib}' for lib in libraries]
|
|
68
76
|
cc_cmd += [f"-L{dir}" for dir in library_dirs]
|
|
69
77
|
cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None]
|
|
@@ -86,17 +94,17 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
|
|
|
86
94
|
scheme = 'posix_prefix'
|
|
87
95
|
py_include_dir = sysconfig.get_paths(scheme=scheme)["include"]
|
|
88
96
|
custom_backend_dirs = set(os.getenv(var) for var in ('TRITON_CUDACRT_PATH', 'TRITON_CUDART_PATH'))
|
|
97
|
+
# Don't append in place
|
|
89
98
|
include_dirs = include_dirs + [srcdir, py_include_dir, *custom_backend_dirs]
|
|
90
99
|
if os.name == "nt":
|
|
91
|
-
library_dirs
|
|
100
|
+
library_dirs = library_dirs + find_python()
|
|
92
101
|
# Link against Python stable ABI
|
|
93
|
-
# libraries is modified in place
|
|
94
102
|
if "python3" not in libraries:
|
|
95
|
-
libraries
|
|
103
|
+
libraries = libraries + ["python3"]
|
|
96
104
|
if is_msvc(cc):
|
|
97
105
|
_, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
|
|
98
|
-
include_dirs
|
|
99
|
-
library_dirs
|
|
106
|
+
include_dirs = include_dirs + msvc_winsdk_inc_dirs
|
|
107
|
+
library_dirs = library_dirs + msvc_winsdk_lib_dirs
|
|
100
108
|
cc_cmd = _cc_cmd(cc, src, so, include_dirs, library_dirs, libraries)
|
|
101
109
|
|
|
102
110
|
try:
|