triton-windows 3.3.0.post19__cp313-cp313-win_amd64.whl → 3.3.1.post21__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +1 -1
- triton/backends/amd/driver.py +6 -1
- triton/backends/nvidia/compiler.py +1 -3
- triton/backends/nvidia/driver.py +7 -3
- triton/runtime/autotuner.py +2 -2
- triton/runtime/build.py +5 -5
- triton/runtime/tcc/lib/python310.def +1610 -0
- triton/runtime/tcc/lib/python311.def +1633 -0
- triton/runtime/tcc/lib/python312.def +1703 -0
- triton/runtime/tcc/lib/python313.def +1651 -0
- triton/runtime/tcc/lib/python313t.def +1656 -0
- triton/runtime/tcc/lib/python39.def +1644 -0
- triton/runtime/tcc/lib/python3t.def +905 -0
- triton/windows_utils.py +11 -4
- {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/METADATA +1 -1
- {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/RECORD +19 -109
- {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/WHEEL +1 -1
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/top_level.txt +0 -0
triton/_C/libtriton.pyd
CHANGED
|
Binary file
|
triton/__init__.py
CHANGED
triton/backends/amd/driver.py
CHANGED
|
@@ -383,11 +383,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
|
|
|
383
383
|
if (!PyLong_Check(ret)) {{
|
|
384
384
|
PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
|
|
385
385
|
ptr_info.valid = false;
|
|
386
|
+
Py_DECREF(ret);
|
|
386
387
|
return ptr_info;
|
|
387
388
|
}}
|
|
388
389
|
ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
|
|
389
|
-
if(!ptr_info.dev_ptr)
|
|
390
|
+
if(!ptr_info.dev_ptr) {{
|
|
391
|
+
Py_DECREF(ret);
|
|
390
392
|
return ptr_info;
|
|
393
|
+
}}
|
|
391
394
|
uint64_t dev_ptr;
|
|
392
395
|
hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
|
|
393
396
|
if (status == hipErrorInvalidValue) {{
|
|
@@ -433,6 +436,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
433
436
|
Py_DECREF(args);
|
|
434
437
|
if (!ret)
|
|
435
438
|
return NULL;
|
|
439
|
+
Py_DECREF(ret);
|
|
436
440
|
}}
|
|
437
441
|
|
|
438
442
|
|
|
@@ -446,6 +450,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
446
450
|
Py_DECREF(args);
|
|
447
451
|
if (!ret)
|
|
448
452
|
return NULL;
|
|
453
|
+
Py_DECREF(ret);
|
|
449
454
|
}}
|
|
450
455
|
|
|
451
456
|
if(PyErr_Occurred()) {{
|
|
@@ -152,7 +152,7 @@ class CUDAOptions:
|
|
|
152
152
|
ptx_version: int = None
|
|
153
153
|
enable_fp_fusion: bool = True
|
|
154
154
|
launch_cooperative_grid: bool = False
|
|
155
|
-
supported_fp8_dtypes: Tuple[str] = ("fp8e5", "fp8e4b15")
|
|
155
|
+
supported_fp8_dtypes: Tuple[str] = ("fp8e4nv", "fp8e5", "fp8e4b15")
|
|
156
156
|
deprecated_fp8_dtypes: Tuple[str] = ()
|
|
157
157
|
default_dot_input_precision: str = "tf32"
|
|
158
158
|
allowed_dot_input_precisions: Tuple[str] = ("tf32", "tf32x3", "ieee")
|
|
@@ -203,8 +203,6 @@ class CUDABackend(BaseBackend):
|
|
|
203
203
|
|
|
204
204
|
if "supported_fp8_dtypes" not in args:
|
|
205
205
|
supported_fp8_dtypes = set(CUDAOptions.supported_fp8_dtypes)
|
|
206
|
-
if capability >= 89:
|
|
207
|
-
supported_fp8_dtypes.add("fp8e4nv")
|
|
208
206
|
args["supported_fp8_dtypes"] = tuple(sorted(supported_fp8_dtypes))
|
|
209
207
|
|
|
210
208
|
if "deprecated_fp8_dtypes" not in args:
|
triton/backends/nvidia/driver.py
CHANGED
|
@@ -372,11 +372,14 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
|
|
|
372
372
|
if (!PyLong_Check(ret)) {{
|
|
373
373
|
PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
|
|
374
374
|
ptr_info.valid = false;
|
|
375
|
+
Py_DECREF(ret);
|
|
375
376
|
return ptr_info;
|
|
376
377
|
}}
|
|
377
378
|
ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret);
|
|
378
|
-
if(!ptr_info.dev_ptr)
|
|
379
|
+
if(!ptr_info.dev_ptr) {{
|
|
380
|
+
Py_DECREF(ret);
|
|
379
381
|
return ptr_info;
|
|
382
|
+
}}
|
|
380
383
|
uint64_t dev_ptr;
|
|
381
384
|
int status = cuPointerGetAttribute(&dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
|
|
382
385
|
if (status == CUDA_ERROR_INVALID_VALUE) {{
|
|
@@ -388,7 +391,7 @@ static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
|
|
|
388
391
|
ptr_info.valid = false;
|
|
389
392
|
}}
|
|
390
393
|
ptr_info.dev_ptr = dev_ptr;
|
|
391
|
-
Py_DECREF(ret);
|
|
394
|
+
Py_DECREF(ret);
|
|
392
395
|
return ptr_info;
|
|
393
396
|
}}
|
|
394
397
|
PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
|
|
@@ -488,6 +491,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
488
491
|
Py_DECREF(args);
|
|
489
492
|
if (!ret)
|
|
490
493
|
return NULL;
|
|
494
|
+
Py_DECREF(ret);
|
|
491
495
|
}}
|
|
492
496
|
|
|
493
497
|
CUdeviceptr global_scratch = 0;
|
|
@@ -515,7 +519,7 @@ static PyObject* launch(PyObject* self, PyObject* args) {{
|
|
|
515
519
|
Py_DECREF(args);
|
|
516
520
|
if (!ret)
|
|
517
521
|
return NULL;
|
|
518
|
-
|
|
522
|
+
Py_DECREF(ret);
|
|
519
523
|
}}
|
|
520
524
|
|
|
521
525
|
Py_RETURN_NONE;
|
triton/runtime/autotuner.py
CHANGED
|
@@ -188,9 +188,9 @@ class Autotuner(KernelInterface):
|
|
|
188
188
|
# prune configs
|
|
189
189
|
used_cached_result = False
|
|
190
190
|
pruned_configs = self.prune_configs(kwargs)
|
|
191
|
-
bench_start = time.
|
|
191
|
+
bench_start = time.perf_counter()
|
|
192
192
|
timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
|
|
193
|
-
bench_end = time.
|
|
193
|
+
bench_end = time.perf_counter()
|
|
194
194
|
self.bench_time = bench_end - bench_start
|
|
195
195
|
self.cache[key] = builtins.min(timings, key=timings.get)
|
|
196
196
|
full_nargs = {**self.nargs, **kwargs, **self.cache[key].all_kwargs()}
|
triton/runtime/build.py
CHANGED
|
@@ -80,17 +80,17 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
|
|
|
80
80
|
scheme = 'posix_prefix'
|
|
81
81
|
py_include_dir = sysconfig.get_paths(scheme=scheme)["include"]
|
|
82
82
|
custom_backend_dirs = set(os.getenv(var) for var in ('TRITON_CUDACRT_PATH', 'TRITON_CUDART_PATH'))
|
|
83
|
+
# Don't append in place
|
|
83
84
|
include_dirs = include_dirs + [srcdir, py_include_dir, *custom_backend_dirs]
|
|
84
85
|
if os.name == "nt":
|
|
85
|
-
library_dirs
|
|
86
|
+
library_dirs = library_dirs + find_python()
|
|
86
87
|
# Link against Python stable ABI
|
|
87
|
-
# libraries is modified in place
|
|
88
88
|
if "python3" not in libraries:
|
|
89
|
-
libraries
|
|
89
|
+
libraries = libraries + ["python3"]
|
|
90
90
|
if is_msvc(cc):
|
|
91
91
|
_, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
|
|
92
|
-
include_dirs
|
|
93
|
-
library_dirs
|
|
92
|
+
include_dirs = include_dirs + msvc_winsdk_inc_dirs
|
|
93
|
+
library_dirs = library_dirs + msvc_winsdk_lib_dirs
|
|
94
94
|
cc_cmd = _cc_cmd(cc, src, so, include_dirs, library_dirs, libraries)
|
|
95
95
|
|
|
96
96
|
try:
|