triton-windows 3.3.0a0.post17__cp310-cp310-win_amd64.whl → 3.3.0.post19__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/backends/amd/compiler.py +4 -0
- triton/backends/nvidia/compiler.py +22 -1
- triton/backends/nvidia/driver.c +1 -0
- triton/backends/nvidia/driver.py +5 -0
- triton/runtime/autotuner.py +18 -2
- triton/runtime/build.py +22 -3
- triton/tools/extra/cuda/compile.c +1 -0
- triton/windows_utils.py +96 -35
- {triton_windows-3.3.0a0.post17.dist-info → triton_windows-3.3.0.post19.dist-info}/METADATA +1 -1
- {triton_windows-3.3.0a0.post17.dist-info → triton_windows-3.3.0.post19.dist-info}/RECORD +13 -13
- {triton_windows-3.3.0a0.post17.dist-info → triton_windows-3.3.0.post19.dist-info}/WHEEL +1 -1
- {triton_windows-3.3.0a0.post17.dist-info → triton_windows-3.3.0.post19.dist-info}/top_level.txt +0 -0
triton/_C/libtriton.pyd
CHANGED
|
Binary file
|
triton/backends/amd/compiler.py
CHANGED
|
@@ -28,6 +28,10 @@ class HIPOptions:
|
|
|
28
28
|
waves_per_eu: int = 1
|
|
29
29
|
num_stages: int = 2
|
|
30
30
|
num_ctas: int = 1
|
|
31
|
+
num_buffers_warp_spec: int = 0
|
|
32
|
+
num_consumer_groups: int = 0
|
|
33
|
+
reg_dec_producer: int = 0
|
|
34
|
+
reg_inc_consumer: int = 0
|
|
31
35
|
extern_libs: dict = None
|
|
32
36
|
cluster_dims: tuple = (1, 1, 1)
|
|
33
37
|
debug: bool = False
|
|
@@ -141,6 +141,10 @@ class CUDAOptions:
|
|
|
141
141
|
num_warps: int = 4
|
|
142
142
|
num_ctas: int = 1
|
|
143
143
|
num_stages: int = 3
|
|
144
|
+
num_buffers_warp_spec: int = 0
|
|
145
|
+
num_consumer_groups: int = 0
|
|
146
|
+
reg_dec_producer: int = 0
|
|
147
|
+
reg_inc_consumer: int = 0
|
|
144
148
|
# maxnreg corresponds to the ptx parameter .maxnreg, which controls the
|
|
145
149
|
# maximum number of 32-bit registers used by one thread.
|
|
146
150
|
maxnreg: Optional[int] = None
|
|
@@ -285,16 +289,29 @@ class CUDABackend(BaseBackend):
|
|
|
285
289
|
passes.ttgpuir.add_optimize_accumulator_init(pm)
|
|
286
290
|
passes.common.add_canonicalizer(pm)
|
|
287
291
|
passes.ttgpuir.add_combine_tensor_select_and_if(pm)
|
|
292
|
+
passes.ttgpuir.add_ws_task_partition(pm, opt.num_consumer_groups)
|
|
293
|
+
passes.ttgpuir.add_taskid_propagate(pm, opt.num_consumer_groups)
|
|
294
|
+
passes.ttgpuir.add_ws_data_partition(pm, opt.num_consumer_groups)
|
|
295
|
+
passes.ttgpuir.add_ws_code_partition(pm, opt.num_buffers_warp_spec, opt.num_consumer_groups,
|
|
296
|
+
opt.reg_dec_producer, opt.reg_inc_consumer)
|
|
288
297
|
passes.ttgpuir.add_pipeline(pm, opt.num_stages, dump_enabled)
|
|
298
|
+
passes.ttgpuir.add_ping_pong_sync(pm, opt.num_consumer_groups)
|
|
299
|
+
passes.ttgpuir.add_ws_lowering(pm, opt.num_consumer_groups)
|
|
289
300
|
elif capability // 10 >= 10:
|
|
290
301
|
passes.ttgpuir.add_fuse_nested_loops(pm)
|
|
291
302
|
passes.common.add_canonicalizer(pm)
|
|
292
303
|
passes.common.add_licm(pm)
|
|
293
304
|
passes.ttgpuir.add_optimize_accumulator_init(pm)
|
|
305
|
+
passes.ttgpuir.add_ws_task_partition(pm, opt.num_consumer_groups)
|
|
306
|
+
passes.ttgpuir.add_taskid_propagate(pm, opt.num_consumer_groups)
|
|
307
|
+
passes.ttgpuir.add_ws_data_partition(pm, opt.num_consumer_groups)
|
|
308
|
+
passes.ttgpuir.add_ws_code_partition(pm, opt.num_buffers_warp_spec, opt.num_consumer_groups,
|
|
309
|
+
opt.reg_dec_producer, opt.reg_inc_consumer)
|
|
294
310
|
passes.ttgpuir.add_pipeline(pm, opt.num_stages, dump_enabled)
|
|
295
311
|
passes.ttgpuir.add_combine_tensor_select_and_if(pm)
|
|
296
312
|
nvidia.passes.ttnvgpuir.add_promote_lhs_to_tmem(pm)
|
|
297
313
|
nvidia.passes.ttnvgpuir.add_keep_acc_in_tmem(pm)
|
|
314
|
+
passes.ttgpuir.add_ws_lowering(pm, opt.num_consumer_groups)
|
|
298
315
|
passes.common.add_canonicalizer(pm)
|
|
299
316
|
else:
|
|
300
317
|
passes.common.add_licm(pm)
|
|
@@ -310,6 +327,8 @@ class CUDABackend(BaseBackend):
|
|
|
310
327
|
nvidia.passes.ttnvgpuir.add_fence_insertion(pm)
|
|
311
328
|
nvidia.passes.ttnvgpuir.add_tma_lowering(pm)
|
|
312
329
|
passes.common.add_canonicalizer(pm)
|
|
330
|
+
if capability // 10 >= 9:
|
|
331
|
+
passes.ttgpuir.add_ws_canonicalization(pm, opt.num_consumer_groups)
|
|
313
332
|
pm.run(mod)
|
|
314
333
|
metadata["cluster_dims"] = (cluster_info.clusterDimX, cluster_info.clusterDimY, cluster_info.clusterDimZ)
|
|
315
334
|
return mod
|
|
@@ -416,7 +435,9 @@ class CUDABackend(BaseBackend):
|
|
|
416
435
|
opt_level = ['--opt-level', '0'] if os.environ.get("DISABLE_PTXAS_OPT", "0") == "1" else []
|
|
417
436
|
ptxas_cmd = [ptxas, *line_info, *fmad, '-v', *opt_level, f'--gpu-name={arch}', fsrc.name, '-o', fbin]
|
|
418
437
|
try:
|
|
419
|
-
|
|
438
|
+
# close_fds=True on Windows and False on Linux, see https://github.com/triton-lang/triton/pull/4357
|
|
439
|
+
# On Windows, both stdout and stderr need to be redirected to flog
|
|
440
|
+
subprocess.run(ptxas_cmd, check=True, close_fds=True if os.name == 'nt' else False, stdout=flog, stderr=flog)
|
|
420
441
|
except subprocess.CalledProcessError as e:
|
|
421
442
|
with open(flog.name) as log_file:
|
|
422
443
|
log = log_file.read()
|
triton/backends/nvidia/driver.c
CHANGED
triton/backends/nvidia/driver.py
CHANGED
|
@@ -74,6 +74,10 @@ def compile_module_from_src(src, name):
|
|
|
74
74
|
so = _build(name, src_path, tmpdir, library_dirs(), include_dir, libraries)
|
|
75
75
|
with open(so, "rb") as f:
|
|
76
76
|
cache_path = cache.put(f.read(), f"{name}.{ext}", binary=True)
|
|
77
|
+
|
|
78
|
+
# Loading module with relative path may cause error
|
|
79
|
+
cache_path = os.path.abspath(cache_path)
|
|
80
|
+
|
|
77
81
|
import importlib.util
|
|
78
82
|
spec = importlib.util.spec_from_file_location(name, cache_path)
|
|
79
83
|
mod = importlib.util.module_from_spec(spec)
|
|
@@ -204,6 +208,7 @@ def make_launcher(constants, signature):
|
|
|
204
208
|
params = [f"&arg{i}" for i, ty in signature.items() if ty != "constexpr"]
|
|
205
209
|
params.append("&global_scratch")
|
|
206
210
|
src = f"""
|
|
211
|
+
#define _CRT_SECURE_NO_WARNINGS
|
|
207
212
|
#include \"cuda.h\"
|
|
208
213
|
#include <stdbool.h>
|
|
209
214
|
#define PY_SSIZE_T_CLEAN
|
triton/runtime/autotuner.py
CHANGED
|
@@ -36,7 +36,10 @@ class Autotuner(KernelInterface):
|
|
|
36
36
|
'prune_num_stages_by'(optional): a function used to prune num_stages. It takes configs:List[Config] as its input, and returns pruned configs.
|
|
37
37
|
"""
|
|
38
38
|
if not configs:
|
|
39
|
-
self.configs = [
|
|
39
|
+
self.configs = [
|
|
40
|
+
Config({}, num_warps=4, num_stages=3, num_ctas=1, num_buffers_warp_spec=0, num_consumer_groups=0,
|
|
41
|
+
reg_dec_producer=0, reg_inc_consumer=0)
|
|
42
|
+
]
|
|
40
43
|
else:
|
|
41
44
|
self.configs = configs
|
|
42
45
|
self.keys = key
|
|
@@ -269,11 +272,16 @@ class Config:
|
|
|
269
272
|
function are args.
|
|
270
273
|
"""
|
|
271
274
|
|
|
272
|
-
def __init__(self, kwargs, num_warps=4, num_stages=3, num_ctas=1,
|
|
275
|
+
def __init__(self, kwargs, num_warps=4, num_stages=3, num_ctas=1, num_buffers_warp_spec=0, num_consumer_groups=0,
|
|
276
|
+
reg_dec_producer=0, reg_inc_consumer=0, maxnreg=None, pre_hook=None):
|
|
273
277
|
self.kwargs = kwargs
|
|
274
278
|
self.num_warps = num_warps
|
|
275
279
|
self.num_ctas = num_ctas
|
|
276
280
|
self.num_stages = num_stages
|
|
281
|
+
self.num_buffers_warp_spec = num_buffers_warp_spec
|
|
282
|
+
self.num_consumer_groups = num_consumer_groups
|
|
283
|
+
self.reg_dec_producer = reg_dec_producer
|
|
284
|
+
self.reg_inc_consumer = reg_inc_consumer
|
|
277
285
|
self.maxnreg = maxnreg
|
|
278
286
|
self.pre_hook = pre_hook
|
|
279
287
|
|
|
@@ -285,6 +293,10 @@ class Config:
|
|
|
285
293
|
("num_warps", self.num_warps),
|
|
286
294
|
("num_ctas", self.num_ctas),
|
|
287
295
|
("num_stages", self.num_stages),
|
|
296
|
+
("num_buffers_warp_spec", self.num_buffers_warp_spec),
|
|
297
|
+
("num_consumer_groups", self.num_consumer_groups),
|
|
298
|
+
("reg_dec_producer", self.reg_dec_producer),
|
|
299
|
+
("reg_inc_consumer", self.reg_inc_consumer),
|
|
288
300
|
("maxnreg", self.maxnreg),
|
|
289
301
|
) if v is not None
|
|
290
302
|
}
|
|
@@ -297,6 +309,10 @@ class Config:
|
|
|
297
309
|
res.append(f"num_warps: {self.num_warps}")
|
|
298
310
|
res.append(f"num_ctas: {self.num_ctas}")
|
|
299
311
|
res.append(f"num_stages: {self.num_stages}")
|
|
312
|
+
res.append(f"num_buffers_warp_spec: {self.num_buffers_warp_spec}")
|
|
313
|
+
res.append(f"num_consumer_groups: {self.num_consumer_groups}")
|
|
314
|
+
res.append(f"reg_dec_producer: {self.reg_dec_producer}")
|
|
315
|
+
res.append(f"reg_inc_consumer: {self.reg_inc_consumer}")
|
|
300
316
|
res.append(f"maxnreg: {self.maxnreg}")
|
|
301
317
|
return ", ".join(res)
|
|
302
318
|
|
triton/runtime/build.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import functools
|
|
1
2
|
import sysconfig
|
|
2
3
|
import os
|
|
3
4
|
import shutil
|
|
@@ -7,8 +8,12 @@ if os.name == "nt":
|
|
|
7
8
|
from triton.windows_utils import find_msvc_winsdk, find_python
|
|
8
9
|
|
|
9
10
|
|
|
11
|
+
@functools.cache
|
|
10
12
|
def get_cc():
|
|
11
13
|
cc = os.environ.get("CC")
|
|
14
|
+
if cc is None:
|
|
15
|
+
# Find and check MSVC and Windows SDK from environment variables set by Launch-VsDevShell.ps1 or VsDevCmd.bat
|
|
16
|
+
cc, _, _ = find_msvc_winsdk(env_only=True)
|
|
12
17
|
if cc is None:
|
|
13
18
|
# Bundled TinyCC
|
|
14
19
|
cc = os.path.join(sysconfig.get_paths()["platlib"], "triton", "runtime", "tcc", "tcc.exe")
|
|
@@ -30,6 +35,11 @@ def is_msvc(cc):
|
|
|
30
35
|
return cc == "cl" or cc == "cl.exe"
|
|
31
36
|
|
|
32
37
|
|
|
38
|
+
def is_clang(cc):
|
|
39
|
+
cc = os.path.basename(cc).lower()
|
|
40
|
+
return cc == "clang" or cc == "clang.exe"
|
|
41
|
+
|
|
42
|
+
|
|
33
43
|
def _cc_cmd(cc, src, out, include_dirs, library_dirs, libraries):
|
|
34
44
|
if is_msvc(cc):
|
|
35
45
|
out_base = os.path.splitext(out)[0]
|
|
@@ -44,7 +54,10 @@ def _cc_cmd(cc, src, out, include_dirs, library_dirs, libraries):
|
|
|
44
54
|
cc_cmd += [f"/PDB:{out_base + '.pdb'}"]
|
|
45
55
|
else:
|
|
46
56
|
# for -Wno-psabi, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111047
|
|
47
|
-
cc_cmd = [cc, src, "-O3", "-shared", "-
|
|
57
|
+
cc_cmd = [cc, src, "-O3", "-shared", "-Wno-psabi", "-o", out]
|
|
58
|
+
if not (os.name == "nt" and is_clang(cc)):
|
|
59
|
+
# Clang does not support -fPIC on Windows
|
|
60
|
+
cc_cmd += ["-fPIC"]
|
|
48
61
|
cc_cmd += [f'-l{lib}' for lib in libraries]
|
|
49
62
|
cc_cmd += [f"-L{dir}" for dir in library_dirs]
|
|
50
63
|
cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None]
|
|
@@ -75,9 +88,15 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
|
|
|
75
88
|
if "python3" not in libraries:
|
|
76
89
|
libraries += ["python3"]
|
|
77
90
|
if is_msvc(cc):
|
|
78
|
-
msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
|
|
91
|
+
_, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
|
|
79
92
|
include_dirs += msvc_winsdk_inc_dirs
|
|
80
93
|
library_dirs += msvc_winsdk_lib_dirs
|
|
81
94
|
cc_cmd = _cc_cmd(cc, src, so, include_dirs, library_dirs, libraries)
|
|
82
|
-
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
ret = subprocess.check_call(cc_cmd)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
print("Failed to compile. cc_cmd:", cc_cmd)
|
|
100
|
+
raise e
|
|
101
|
+
|
|
83
102
|
return so
|
triton/windows_utils.py
CHANGED
|
@@ -57,12 +57,31 @@ def check_msvc(msvc_base_path: Path, version: str) -> bool:
|
|
|
57
57
|
return all(
|
|
58
58
|
x.exists()
|
|
59
59
|
for x in [
|
|
60
|
+
msvc_base_path / version / "bin" / "Hostx64" / "x64" / "cl.exe",
|
|
60
61
|
msvc_base_path / version / "include" / "vcruntime.h",
|
|
61
62
|
msvc_base_path / version / "lib" / "x64" / "vcruntime.lib",
|
|
62
63
|
]
|
|
63
64
|
)
|
|
64
65
|
|
|
65
66
|
|
|
67
|
+
def find_msvc_env() -> tuple[Optional[Path], Optional[str]]:
|
|
68
|
+
msvc_base_path = os.getenv("VCINSTALLDIR")
|
|
69
|
+
if msvc_base_path is None:
|
|
70
|
+
return None, None
|
|
71
|
+
msvc_base_path = Path(msvc_base_path) / "Tools" / "MSVC"
|
|
72
|
+
|
|
73
|
+
version = os.getenv("VCToolsVersion")
|
|
74
|
+
if not check_msvc(msvc_base_path, version):
|
|
75
|
+
warnings.warn(
|
|
76
|
+
f"Environment variables VCINSTALLDIR = {os.getenv('VCINSTALLDIR')}, "
|
|
77
|
+
f"VCToolsVersion = {os.getenv('VCToolsVersion')} are set, "
|
|
78
|
+
"but this MSVC installation is incomplete."
|
|
79
|
+
)
|
|
80
|
+
return None, None
|
|
81
|
+
|
|
82
|
+
return msvc_base_path, version
|
|
83
|
+
|
|
84
|
+
|
|
66
85
|
def find_msvc_vswhere() -> tuple[Optional[Path], Optional[str]]:
|
|
67
86
|
vswhere_path = find_in_program_files(
|
|
68
87
|
r"Microsoft Visual Studio\Installer\vswhere.exe"
|
|
@@ -144,20 +163,28 @@ def find_msvc_hardcoded() -> tuple[Optional[Path], Optional[str]]:
|
|
|
144
163
|
return None, None
|
|
145
164
|
|
|
146
165
|
|
|
147
|
-
def find_msvc() -> tuple[list[str], list[str]]:
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
166
|
+
def find_msvc(env_only: bool) -> tuple[Optional[str], list[str], list[str]]:
|
|
167
|
+
if env_only:
|
|
168
|
+
fs = [find_msvc_env]
|
|
169
|
+
else:
|
|
170
|
+
fs = [
|
|
171
|
+
find_msvc_env,
|
|
172
|
+
find_msvc_vswhere,
|
|
173
|
+
find_msvc_envpath,
|
|
174
|
+
find_msvc_hardcoded,
|
|
175
|
+
]
|
|
176
|
+
for f in fs:
|
|
177
|
+
msvc_base_path, version = f()
|
|
178
|
+
if msvc_base_path:
|
|
179
|
+
return (
|
|
180
|
+
str(msvc_base_path / version / "bin" / "Hostx64" / "x64" / "cl.exe"),
|
|
181
|
+
[str(msvc_base_path / version / "include")],
|
|
182
|
+
[str(msvc_base_path / version / "lib" / "x64")],
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if not env_only:
|
|
154
186
|
warnings.warn("Failed to find MSVC.")
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
return (
|
|
158
|
-
[str(msvc_base_path / version / "include")],
|
|
159
|
-
[str(msvc_base_path / version / "lib" / "x64")],
|
|
160
|
-
)
|
|
187
|
+
return None, [], []
|
|
161
188
|
|
|
162
189
|
|
|
163
190
|
def check_winsdk(winsdk_base_path: Path, version: str) -> bool:
|
|
@@ -170,6 +197,26 @@ def check_winsdk(winsdk_base_path: Path, version: str) -> bool:
|
|
|
170
197
|
)
|
|
171
198
|
|
|
172
199
|
|
|
200
|
+
def find_winsdk_env() -> tuple[Optional[Path], Optional[str]]:
|
|
201
|
+
winsdk_base_path = os.getenv("WindowsSdkDir")
|
|
202
|
+
if winsdk_base_path is None:
|
|
203
|
+
return None, None
|
|
204
|
+
winsdk_base_path = Path(winsdk_base_path)
|
|
205
|
+
|
|
206
|
+
version = os.getenv("WindowsSDKVersion")
|
|
207
|
+
if version:
|
|
208
|
+
version = version.rstrip("\\")
|
|
209
|
+
if not check_winsdk(winsdk_base_path, version):
|
|
210
|
+
warnings.warn(
|
|
211
|
+
f"Environment variables WindowsSdkDir = {os.getenv('WindowsSdkDir')}, "
|
|
212
|
+
f"WindowsSDKVersion = {os.getenv('WindowsSDKVersion')} are set, "
|
|
213
|
+
"but this Windows SDK installation is incomplete."
|
|
214
|
+
)
|
|
215
|
+
return None, None
|
|
216
|
+
|
|
217
|
+
return winsdk_base_path, version
|
|
218
|
+
|
|
219
|
+
|
|
173
220
|
def find_winsdk_registry() -> tuple[Optional[Path], Optional[str]]:
|
|
174
221
|
try:
|
|
175
222
|
reg = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
|
|
@@ -212,32 +259,46 @@ def find_winsdk_hardcoded() -> tuple[Optional[Path], Optional[str]]:
|
|
|
212
259
|
return winsdk_base_path, version
|
|
213
260
|
|
|
214
261
|
|
|
215
|
-
def find_winsdk() -> tuple[list[str], list[str]]:
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
262
|
+
def find_winsdk(env_only: bool) -> tuple[list[str], list[str]]:
|
|
263
|
+
if env_only:
|
|
264
|
+
fs = [find_winsdk_env]
|
|
265
|
+
else:
|
|
266
|
+
fs = [
|
|
267
|
+
find_winsdk_env,
|
|
268
|
+
find_winsdk_registry,
|
|
269
|
+
find_winsdk_hardcoded,
|
|
270
|
+
]
|
|
271
|
+
for f in fs:
|
|
272
|
+
winsdk_base_path, version = f()
|
|
273
|
+
if winsdk_base_path:
|
|
274
|
+
return (
|
|
275
|
+
[
|
|
276
|
+
str(winsdk_base_path / "Include" / version / "shared"),
|
|
277
|
+
str(winsdk_base_path / "Include" / version / "ucrt"),
|
|
278
|
+
str(winsdk_base_path / "Include" / version / "um"),
|
|
279
|
+
],
|
|
280
|
+
[
|
|
281
|
+
str(winsdk_base_path / "Lib" / version / "ucrt" / "x64"),
|
|
282
|
+
str(winsdk_base_path / "Lib" / version / "um" / "x64"),
|
|
283
|
+
],
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if not env_only:
|
|
220
287
|
warnings.warn("Failed to find Windows SDK.")
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
return (
|
|
224
|
-
[
|
|
225
|
-
str(winsdk_base_path / "Include" / version / "shared"),
|
|
226
|
-
str(winsdk_base_path / "Include" / version / "ucrt"),
|
|
227
|
-
str(winsdk_base_path / "Include" / version / "um"),
|
|
228
|
-
],
|
|
229
|
-
[
|
|
230
|
-
str(winsdk_base_path / "Lib" / version / "ucrt" / "x64"),
|
|
231
|
-
str(winsdk_base_path / "Lib" / version / "um" / "x64"),
|
|
232
|
-
],
|
|
233
|
-
)
|
|
288
|
+
return [], []
|
|
234
289
|
|
|
235
290
|
|
|
236
291
|
@functools.cache
|
|
237
|
-
def find_msvc_winsdk(
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
292
|
+
def find_msvc_winsdk(
|
|
293
|
+
env_only: bool = False,
|
|
294
|
+
) -> tuple[Optional[str], list[str], list[str]]:
|
|
295
|
+
msvc_bin_path, msvc_inc_dirs, msvc_lib_dirs = find_msvc(env_only)
|
|
296
|
+
winsdk_inc_dirs, winsdk_lib_dirs = find_winsdk(env_only)
|
|
297
|
+
return (
|
|
298
|
+
msvc_bin_path,
|
|
299
|
+
msvc_inc_dirs + winsdk_inc_dirs,
|
|
300
|
+
msvc_lib_dirs + winsdk_lib_dirs,
|
|
301
|
+
)
|
|
241
302
|
|
|
242
303
|
|
|
243
304
|
@functools.cache
|
|
@@ -3,12 +3,12 @@ triton/_internal_testing.py,sha256=OBY28huiEWItqGgiukgZzHLLaSbS8yj9kdhn_u562Yg,5
|
|
|
3
3
|
triton/_utils.py,sha256=5RiCLwW14w0Q3mdZ-9yz-VO5KiSexNj9xeDt4gaNsvE,1014
|
|
4
4
|
triton/errors.py,sha256=8WfnuRKLG578mgY6cBA3ECruVMf9ULEKFNgRcJ6IhWM,89
|
|
5
5
|
triton/testing.py,sha256=ivFf1Fq9frmfVahaVUp0bgJxmvVZNACZfj3Sai6zfAs,20048
|
|
6
|
-
triton/windows_utils.py,sha256=
|
|
7
|
-
triton/_C/libtriton.pyd,sha256=
|
|
6
|
+
triton/windows_utils.py,sha256=YUl-1QbLINQRaAAMNYPjLiTFZlVsCNi9mTFck5aemwk,12778
|
|
7
|
+
triton/_C/libtriton.pyd,sha256=Cb3yDVnYOY2lCtZIIcJzHwNUQCkt7WHuq0nognPj2FU,87272960
|
|
8
8
|
triton/backends/__init__.py,sha256=opAo_vgEMt3tLO_bYFrYGksnIu0qohbmyuu_s3-rNAs,1595
|
|
9
9
|
triton/backends/compiler.py,sha256=ymaG0kpveAuESbQ9QZ0RyXjr0Aq4el_G5XGYogJ2gNA,3588
|
|
10
10
|
triton/backends/driver.py,sha256=AN60upJlPgia0JwvZ8vIVgLITNPuI0fdz8zMIIHPpF4,1450
|
|
11
|
-
triton/backends/amd/compiler.py,sha256=
|
|
11
|
+
triton/backends/amd/compiler.py,sha256=27jurEV7tH5J6BHtOXrdPJyCMYIPHo5G4Op_O66gv4E,19135
|
|
12
12
|
triton/backends/amd/driver.c,sha256=obiiiPndny5NyhUcJ8iyrVHrXU1ruLpLGd_LgaKQEbU,8459
|
|
13
13
|
triton/backends/amd/driver.py,sha256=p8FcTiAq-829p2gRQZ5sPE1_d1SQQ_sOhb5WxI6rXME,20379
|
|
14
14
|
triton/backends/amd/include/hip/channel_descriptor.h,sha256=gTYe7SzIg-m3ThOQY2vr5Rh6-uWvUP_d37v8F4T2Q14,1773
|
|
@@ -112,9 +112,9 @@ triton/backends/amd/lib/asanrtl.bc,sha256=1xv2RlU3WvbdsghHlmhwiHewGM2B5dKts5bERM
|
|
|
112
112
|
triton/backends/amd/lib/ockl.bc,sha256=wQKCzkKukIHbu0lyjKUYlhndc7S27xto6L54J0Bn-C0,246124
|
|
113
113
|
triton/backends/amd/lib/ocml.bc,sha256=UPNTXW0gCXUNB-c6orSYwb-mz9_mjUc7zny_vfFza44,205964
|
|
114
114
|
triton/backends/nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
|
-
triton/backends/nvidia/compiler.py,sha256=
|
|
116
|
-
triton/backends/nvidia/driver.c,sha256=
|
|
117
|
-
triton/backends/nvidia/driver.py,sha256=
|
|
115
|
+
triton/backends/nvidia/compiler.py,sha256=4BnLUapsoh8lsQPYlm-_ZCyKvU3DgJY3NgDMo8leV9c,19996
|
|
116
|
+
triton/backends/nvidia/driver.c,sha256=HqtiJDPRxVav9pHl_swBja28RasjHZ7oFOpWKMT627c,18912
|
|
117
|
+
triton/backends/nvidia/driver.py,sha256=kMMVl4SMbJc6obI2Brxoj7RNJt6YBGKYSB56dPeUY6w,21037
|
|
118
118
|
triton/backends/nvidia/bin/ptxas.exe,sha256=iCva9hIYg-Q2NybchwaQJFkwDzNiliFOTDdZrHPLb6A,24732160
|
|
119
119
|
triton/backends/nvidia/include/cuda.h,sha256=Fn44OjeRImxegJ39apYUspseEfTWNGwpqSGUOnHj5WY,1183268
|
|
120
120
|
triton/backends/nvidia/lib/libdevice.10.bc,sha256=XC-uN8huaMOjhgWpX1EtfRLV89uYYxC-R_VzBKpype4,473728
|
|
@@ -141,8 +141,8 @@ triton/language/extra/hip/__init__.py,sha256=ieSER4LeX9_0horChGUUVwpuKAprkuka8uG
|
|
|
141
141
|
triton/language/extra/hip/libdevice.py,sha256=EVraUfeXzQmN3F5Lleg2mohVcbFWOWlLaAH1nkbqtV4,16841
|
|
142
142
|
triton/runtime/__init__.py,sha256=mKL5cqIBDUw2WO80NRCh4s1G8KYaqgM59TTAbTkPPjQ,621
|
|
143
143
|
triton/runtime/_allocation.py,sha256=zaW4B7I7c-2rkVuN7IZaUB6IQSI1t4FvnTPZH-r7DTk,798
|
|
144
|
-
triton/runtime/autotuner.py,sha256=
|
|
145
|
-
triton/runtime/build.py,sha256=
|
|
144
|
+
triton/runtime/autotuner.py,sha256=0ku0wjPo8xOvom6P4uEVZHsgPkxAFOqP1LjUVue0HLM,17854
|
|
145
|
+
triton/runtime/build.py,sha256=6akPf7jgSRDWqcDGvJXxxiO0MzH6503YrM7xmr24l-Y,3825
|
|
146
146
|
triton/runtime/cache.py,sha256=uoU1UH3HPxkxT0r-69HIZgLBls3T-TDxJedRULX9lbM,10583
|
|
147
147
|
triton/runtime/driver.py,sha256=VZ-883Xri71R72lHB6usIpLo3gGLbZJkAlLP3ewWSpc,1509
|
|
148
148
|
triton/runtime/errors.py,sha256=CwfJXciwel_-K3BfQfKUpLPDWrSyTnGsfJkqJojrdfQ,1052
|
|
@@ -245,9 +245,9 @@ triton/tools/disasm.py,sha256=BBO4bALdLcWgWDLhQdYHLlTx3oo8g_d8maeE_Uu-FmU,5088
|
|
|
245
245
|
triton/tools/experimental_descriptor.py,sha256=0Wqy96Cc6YLh9o0eTknW-Lfvha6lfRSfe8bswkcPHMs,1260
|
|
246
246
|
triton/tools/link.py,sha256=u7qtfZRLriZkAMEGNvj8YF-k1cthmLL7BwHYqBgT63E,11871
|
|
247
247
|
triton/tools/mxfp.py,sha256=YQdpBrGkOVNOtnLeRjMCeVFHWkSwUubGeWsItIjO8TU,11737
|
|
248
|
-
triton/tools/extra/cuda/compile.c,sha256=
|
|
248
|
+
triton/tools/extra/cuda/compile.c,sha256=TdIENsqk6wrvv1C4Mk-sq9keXe3SJuMQcf0UpxmjNZk,2153
|
|
249
249
|
triton/tools/extra/cuda/compile.h,sha256=n9QKIFZTL4RSsiXtAxBP9XGSnxjyaevQQ9bBpwDsvAg,332
|
|
250
|
-
triton_windows-3.3.
|
|
251
|
-
triton_windows-3.3.
|
|
252
|
-
triton_windows-3.3.
|
|
253
|
-
triton_windows-3.3.
|
|
250
|
+
triton_windows-3.3.0.post19.dist-info/METADATA,sha256=n16mof1EMhLJdnP2tX85LoRgmCIpznA9Bx1IJBS-O2w,1627
|
|
251
|
+
triton_windows-3.3.0.post19.dist-info/WHEEL,sha256=2SI2v6oeF8cXA_fO3HjdyTywO2Wxj6SOtX9BZdWd5hs,101
|
|
252
|
+
triton_windows-3.3.0.post19.dist-info/top_level.txt,sha256=KhMzHYsArnZ3IkjAQ-xLnx1n_FjvEpJNelg2xPiDl-U,254
|
|
253
|
+
triton_windows-3.3.0.post19.dist-info/RECORD,,
|
{triton_windows-3.3.0a0.post17.dist-info → triton_windows-3.3.0.post19.dist-info}/top_level.txt
RENAMED
|
File without changes
|