triton-windows 3.3.0a0.post16__cp311-cp311-win_amd64.whl → 3.3.0a0.post18__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/backends/amd/compiler.py +4 -0
- triton/backends/nvidia/compiler.py +22 -1
- triton/backends/nvidia/driver.py +4 -0
- triton/runtime/autotuner.py +18 -2
- triton/runtime/build.py +13 -2
- triton/runtime/cache.py +9 -1
- triton/tools/extra/cuda/compile.c +1 -0
- triton/windows_utils.py +96 -35
- {triton_windows-3.3.0a0.post16.dist-info → triton_windows-3.3.0a0.post18.dist-info}/METADATA +1 -1
- {triton_windows-3.3.0a0.post16.dist-info → triton_windows-3.3.0a0.post18.dist-info}/RECORD +13 -13
- {triton_windows-3.3.0a0.post16.dist-info → triton_windows-3.3.0a0.post18.dist-info}/WHEEL +1 -1
- {triton_windows-3.3.0a0.post16.dist-info → triton_windows-3.3.0a0.post18.dist-info}/top_level.txt +0 -0
triton/_C/libtriton.pyd
CHANGED
|
Binary file
|
triton/backends/amd/compiler.py
CHANGED
|
@@ -28,6 +28,10 @@ class HIPOptions:
|
|
|
28
28
|
waves_per_eu: int = 1
|
|
29
29
|
num_stages: int = 2
|
|
30
30
|
num_ctas: int = 1
|
|
31
|
+
num_buffers_warp_spec: int = 0
|
|
32
|
+
num_consumer_groups: int = 0
|
|
33
|
+
reg_dec_producer: int = 0
|
|
34
|
+
reg_inc_consumer: int = 0
|
|
31
35
|
extern_libs: dict = None
|
|
32
36
|
cluster_dims: tuple = (1, 1, 1)
|
|
33
37
|
debug: bool = False
|
|
@@ -141,6 +141,10 @@ class CUDAOptions:
|
|
|
141
141
|
num_warps: int = 4
|
|
142
142
|
num_ctas: int = 1
|
|
143
143
|
num_stages: int = 3
|
|
144
|
+
num_buffers_warp_spec: int = 0
|
|
145
|
+
num_consumer_groups: int = 0
|
|
146
|
+
reg_dec_producer: int = 0
|
|
147
|
+
reg_inc_consumer: int = 0
|
|
144
148
|
# maxnreg corresponds to the ptx parameter .maxnreg, which controls the
|
|
145
149
|
# maximum number of 32-bit registers used by one thread.
|
|
146
150
|
maxnreg: Optional[int] = None
|
|
@@ -285,16 +289,29 @@ class CUDABackend(BaseBackend):
|
|
|
285
289
|
passes.ttgpuir.add_optimize_accumulator_init(pm)
|
|
286
290
|
passes.common.add_canonicalizer(pm)
|
|
287
291
|
passes.ttgpuir.add_combine_tensor_select_and_if(pm)
|
|
292
|
+
passes.ttgpuir.add_ws_task_partition(pm, opt.num_consumer_groups)
|
|
293
|
+
passes.ttgpuir.add_taskid_propagate(pm, opt.num_consumer_groups)
|
|
294
|
+
passes.ttgpuir.add_ws_data_partition(pm, opt.num_consumer_groups)
|
|
295
|
+
passes.ttgpuir.add_ws_code_partition(pm, opt.num_buffers_warp_spec, opt.num_consumer_groups,
|
|
296
|
+
opt.reg_dec_producer, opt.reg_inc_consumer)
|
|
288
297
|
passes.ttgpuir.add_pipeline(pm, opt.num_stages, dump_enabled)
|
|
298
|
+
passes.ttgpuir.add_ping_pong_sync(pm, opt.num_consumer_groups)
|
|
299
|
+
passes.ttgpuir.add_ws_lowering(pm, opt.num_consumer_groups)
|
|
289
300
|
elif capability // 10 >= 10:
|
|
290
301
|
passes.ttgpuir.add_fuse_nested_loops(pm)
|
|
291
302
|
passes.common.add_canonicalizer(pm)
|
|
292
303
|
passes.common.add_licm(pm)
|
|
293
304
|
passes.ttgpuir.add_optimize_accumulator_init(pm)
|
|
305
|
+
passes.ttgpuir.add_ws_task_partition(pm, opt.num_consumer_groups)
|
|
306
|
+
passes.ttgpuir.add_taskid_propagate(pm, opt.num_consumer_groups)
|
|
307
|
+
passes.ttgpuir.add_ws_data_partition(pm, opt.num_consumer_groups)
|
|
308
|
+
passes.ttgpuir.add_ws_code_partition(pm, opt.num_buffers_warp_spec, opt.num_consumer_groups,
|
|
309
|
+
opt.reg_dec_producer, opt.reg_inc_consumer)
|
|
294
310
|
passes.ttgpuir.add_pipeline(pm, opt.num_stages, dump_enabled)
|
|
295
311
|
passes.ttgpuir.add_combine_tensor_select_and_if(pm)
|
|
296
312
|
nvidia.passes.ttnvgpuir.add_promote_lhs_to_tmem(pm)
|
|
297
313
|
nvidia.passes.ttnvgpuir.add_keep_acc_in_tmem(pm)
|
|
314
|
+
passes.ttgpuir.add_ws_lowering(pm, opt.num_consumer_groups)
|
|
298
315
|
passes.common.add_canonicalizer(pm)
|
|
299
316
|
else:
|
|
300
317
|
passes.common.add_licm(pm)
|
|
@@ -310,6 +327,8 @@ class CUDABackend(BaseBackend):
|
|
|
310
327
|
nvidia.passes.ttnvgpuir.add_fence_insertion(pm)
|
|
311
328
|
nvidia.passes.ttnvgpuir.add_tma_lowering(pm)
|
|
312
329
|
passes.common.add_canonicalizer(pm)
|
|
330
|
+
if capability // 10 >= 9:
|
|
331
|
+
passes.ttgpuir.add_ws_canonicalization(pm, opt.num_consumer_groups)
|
|
313
332
|
pm.run(mod)
|
|
314
333
|
metadata["cluster_dims"] = (cluster_info.clusterDimX, cluster_info.clusterDimY, cluster_info.clusterDimZ)
|
|
315
334
|
return mod
|
|
@@ -416,7 +435,9 @@ class CUDABackend(BaseBackend):
|
|
|
416
435
|
opt_level = ['--opt-level', '0'] if os.environ.get("DISABLE_PTXAS_OPT", "0") == "1" else []
|
|
417
436
|
ptxas_cmd = [ptxas, *line_info, *fmad, '-v', *opt_level, f'--gpu-name={arch}', fsrc.name, '-o', fbin]
|
|
418
437
|
try:
|
|
419
|
-
|
|
438
|
+
# close_fds=True on Windows and False on Linux, see https://github.com/triton-lang/triton/pull/4357
|
|
439
|
+
# On Windows, both stdout and stderr need to be redirected to flog
|
|
440
|
+
subprocess.run(ptxas_cmd, check=True, close_fds=True if os.name == 'nt' else False, stdout=flog, stderr=flog)
|
|
420
441
|
except subprocess.CalledProcessError as e:
|
|
421
442
|
with open(flog.name) as log_file:
|
|
422
443
|
log = log_file.read()
|
triton/backends/nvidia/driver.py
CHANGED
|
@@ -74,6 +74,10 @@ def compile_module_from_src(src, name):
|
|
|
74
74
|
so = _build(name, src_path, tmpdir, library_dirs(), include_dir, libraries)
|
|
75
75
|
with open(so, "rb") as f:
|
|
76
76
|
cache_path = cache.put(f.read(), f"{name}.{ext}", binary=True)
|
|
77
|
+
|
|
78
|
+
# Loading module with relative path may cause error
|
|
79
|
+
cache_path = os.path.abspath(cache_path)
|
|
80
|
+
|
|
77
81
|
import importlib.util
|
|
78
82
|
spec = importlib.util.spec_from_file_location(name, cache_path)
|
|
79
83
|
mod = importlib.util.module_from_spec(spec)
|
triton/runtime/autotuner.py
CHANGED
|
@@ -36,7 +36,10 @@ class Autotuner(KernelInterface):
|
|
|
36
36
|
'prune_num_stages_by'(optional): a function used to prune num_stages. It takes configs:List[Config] as its input, and returns pruned configs.
|
|
37
37
|
"""
|
|
38
38
|
if not configs:
|
|
39
|
-
self.configs = [
|
|
39
|
+
self.configs = [
|
|
40
|
+
Config({}, num_warps=4, num_stages=3, num_ctas=1, num_buffers_warp_spec=0, num_consumer_groups=0,
|
|
41
|
+
reg_dec_producer=0, reg_inc_consumer=0)
|
|
42
|
+
]
|
|
40
43
|
else:
|
|
41
44
|
self.configs = configs
|
|
42
45
|
self.keys = key
|
|
@@ -269,11 +272,16 @@ class Config:
|
|
|
269
272
|
function are args.
|
|
270
273
|
"""
|
|
271
274
|
|
|
272
|
-
def __init__(self, kwargs, num_warps=4, num_stages=3, num_ctas=1,
|
|
275
|
+
def __init__(self, kwargs, num_warps=4, num_stages=3, num_ctas=1, num_buffers_warp_spec=0, num_consumer_groups=0,
|
|
276
|
+
reg_dec_producer=0, reg_inc_consumer=0, maxnreg=None, pre_hook=None):
|
|
273
277
|
self.kwargs = kwargs
|
|
274
278
|
self.num_warps = num_warps
|
|
275
279
|
self.num_ctas = num_ctas
|
|
276
280
|
self.num_stages = num_stages
|
|
281
|
+
self.num_buffers_warp_spec = num_buffers_warp_spec
|
|
282
|
+
self.num_consumer_groups = num_consumer_groups
|
|
283
|
+
self.reg_dec_producer = reg_dec_producer
|
|
284
|
+
self.reg_inc_consumer = reg_inc_consumer
|
|
277
285
|
self.maxnreg = maxnreg
|
|
278
286
|
self.pre_hook = pre_hook
|
|
279
287
|
|
|
@@ -285,6 +293,10 @@ class Config:
|
|
|
285
293
|
("num_warps", self.num_warps),
|
|
286
294
|
("num_ctas", self.num_ctas),
|
|
287
295
|
("num_stages", self.num_stages),
|
|
296
|
+
("num_buffers_warp_spec", self.num_buffers_warp_spec),
|
|
297
|
+
("num_consumer_groups", self.num_consumer_groups),
|
|
298
|
+
("reg_dec_producer", self.reg_dec_producer),
|
|
299
|
+
("reg_inc_consumer", self.reg_inc_consumer),
|
|
288
300
|
("maxnreg", self.maxnreg),
|
|
289
301
|
) if v is not None
|
|
290
302
|
}
|
|
@@ -297,6 +309,10 @@ class Config:
|
|
|
297
309
|
res.append(f"num_warps: {self.num_warps}")
|
|
298
310
|
res.append(f"num_ctas: {self.num_ctas}")
|
|
299
311
|
res.append(f"num_stages: {self.num_stages}")
|
|
312
|
+
res.append(f"num_buffers_warp_spec: {self.num_buffers_warp_spec}")
|
|
313
|
+
res.append(f"num_consumer_groups: {self.num_consumer_groups}")
|
|
314
|
+
res.append(f"reg_dec_producer: {self.reg_dec_producer}")
|
|
315
|
+
res.append(f"reg_inc_consumer: {self.reg_inc_consumer}")
|
|
300
316
|
res.append(f"maxnreg: {self.maxnreg}")
|
|
301
317
|
return ", ".join(res)
|
|
302
318
|
|
triton/runtime/build.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import functools
|
|
1
2
|
import sysconfig
|
|
2
3
|
import os
|
|
3
4
|
import shutil
|
|
@@ -7,8 +8,12 @@ if os.name == "nt":
|
|
|
7
8
|
from triton.windows_utils import find_msvc_winsdk, find_python
|
|
8
9
|
|
|
9
10
|
|
|
11
|
+
@functools.cache
|
|
10
12
|
def get_cc():
|
|
11
13
|
cc = os.environ.get("CC")
|
|
14
|
+
if cc is None:
|
|
15
|
+
# Find and check MSVC and Windows SDK from environment variables set by Launch-VsDevShell.ps1 or VsDevCmd.bat
|
|
16
|
+
cc, _, _ = find_msvc_winsdk(env_only=True)
|
|
12
17
|
if cc is None:
|
|
13
18
|
# Bundled TinyCC
|
|
14
19
|
cc = os.path.join(sysconfig.get_paths()["platlib"], "triton", "runtime", "tcc", "tcc.exe")
|
|
@@ -75,9 +80,15 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
|
|
|
75
80
|
if "python3" not in libraries:
|
|
76
81
|
libraries += ["python3"]
|
|
77
82
|
if is_msvc(cc):
|
|
78
|
-
msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
|
|
83
|
+
_, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
|
|
79
84
|
include_dirs += msvc_winsdk_inc_dirs
|
|
80
85
|
library_dirs += msvc_winsdk_lib_dirs
|
|
81
86
|
cc_cmd = _cc_cmd(cc, src, so, include_dirs, library_dirs, libraries)
|
|
82
|
-
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
ret = subprocess.check_call(cc_cmd)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
print("Failed to compile. cc_cmd:", cc_cmd)
|
|
92
|
+
raise e
|
|
93
|
+
|
|
83
94
|
return so
|
triton/runtime/cache.py
CHANGED
|
@@ -131,7 +131,15 @@ class FileCacheManager(CacheManager):
|
|
|
131
131
|
f.write(data)
|
|
132
132
|
# Replace is guaranteed to be atomic on POSIX systems if it succeeds
|
|
133
133
|
# so filepath cannot see a partial write
|
|
134
|
-
|
|
134
|
+
try:
|
|
135
|
+
os.replace(temp_path, filepath)
|
|
136
|
+
except PermissionError:
|
|
137
|
+
# Ignore PermissionError on Windows because it happens when another process already
|
|
138
|
+
# put a file into the cache and locked it by opening it.
|
|
139
|
+
if os.name == "nt":
|
|
140
|
+
os.remove(temp_path)
|
|
141
|
+
else:
|
|
142
|
+
raise
|
|
135
143
|
os.removedirs(temp_dir)
|
|
136
144
|
return filepath
|
|
137
145
|
|
triton/windows_utils.py
CHANGED
|
@@ -57,12 +57,31 @@ def check_msvc(msvc_base_path: Path, version: str) -> bool:
|
|
|
57
57
|
return all(
|
|
58
58
|
x.exists()
|
|
59
59
|
for x in [
|
|
60
|
+
msvc_base_path / version / "bin" / "Hostx64" / "x64" / "cl.exe",
|
|
60
61
|
msvc_base_path / version / "include" / "vcruntime.h",
|
|
61
62
|
msvc_base_path / version / "lib" / "x64" / "vcruntime.lib",
|
|
62
63
|
]
|
|
63
64
|
)
|
|
64
65
|
|
|
65
66
|
|
|
67
|
+
def find_msvc_env() -> tuple[Optional[Path], Optional[str]]:
|
|
68
|
+
msvc_base_path = os.getenv("VCINSTALLDIR")
|
|
69
|
+
if msvc_base_path is None:
|
|
70
|
+
return None, None
|
|
71
|
+
msvc_base_path = Path(msvc_base_path) / "Tools" / "MSVC"
|
|
72
|
+
|
|
73
|
+
version = os.getenv("VCToolsVersion")
|
|
74
|
+
if not check_msvc(msvc_base_path, version):
|
|
75
|
+
warnings.warn(
|
|
76
|
+
f"Environment variables VCINSTALLDIR = {os.getenv('VCINSTALLDIR')}, "
|
|
77
|
+
f"VCToolsVersion = {os.getenv('VCToolsVersion')} are set, "
|
|
78
|
+
"but this MSVC installation is incomplete."
|
|
79
|
+
)
|
|
80
|
+
return None, None
|
|
81
|
+
|
|
82
|
+
return msvc_base_path, version
|
|
83
|
+
|
|
84
|
+
|
|
66
85
|
def find_msvc_vswhere() -> tuple[Optional[Path], Optional[str]]:
|
|
67
86
|
vswhere_path = find_in_program_files(
|
|
68
87
|
r"Microsoft Visual Studio\Installer\vswhere.exe"
|
|
@@ -144,20 +163,28 @@ def find_msvc_hardcoded() -> tuple[Optional[Path], Optional[str]]:
|
|
|
144
163
|
return None, None
|
|
145
164
|
|
|
146
165
|
|
|
147
|
-
def find_msvc() -> tuple[list[str], list[str]]:
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
166
|
+
def find_msvc(env_only: bool) -> tuple[Optional[str], list[str], list[str]]:
|
|
167
|
+
if env_only:
|
|
168
|
+
fs = [find_msvc_env]
|
|
169
|
+
else:
|
|
170
|
+
fs = [
|
|
171
|
+
find_msvc_env,
|
|
172
|
+
find_msvc_vswhere,
|
|
173
|
+
find_msvc_envpath,
|
|
174
|
+
find_msvc_hardcoded,
|
|
175
|
+
]
|
|
176
|
+
for f in fs:
|
|
177
|
+
msvc_base_path, version = f()
|
|
178
|
+
if msvc_base_path:
|
|
179
|
+
return (
|
|
180
|
+
str(msvc_base_path / version / "bin" / "Hostx64" / "x64" / "cl.exe"),
|
|
181
|
+
[str(msvc_base_path / version / "include")],
|
|
182
|
+
[str(msvc_base_path / version / "lib" / "x64")],
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if not env_only:
|
|
154
186
|
warnings.warn("Failed to find MSVC.")
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
return (
|
|
158
|
-
[str(msvc_base_path / version / "include")],
|
|
159
|
-
[str(msvc_base_path / version / "lib" / "x64")],
|
|
160
|
-
)
|
|
187
|
+
return None, [], []
|
|
161
188
|
|
|
162
189
|
|
|
163
190
|
def check_winsdk(winsdk_base_path: Path, version: str) -> bool:
|
|
@@ -170,6 +197,26 @@ def check_winsdk(winsdk_base_path: Path, version: str) -> bool:
|
|
|
170
197
|
)
|
|
171
198
|
|
|
172
199
|
|
|
200
|
+
def find_winsdk_env() -> tuple[Optional[Path], Optional[str]]:
|
|
201
|
+
winsdk_base_path = os.getenv("WindowsSdkDir")
|
|
202
|
+
if winsdk_base_path is None:
|
|
203
|
+
return None, None
|
|
204
|
+
winsdk_base_path = Path(winsdk_base_path)
|
|
205
|
+
|
|
206
|
+
version = os.getenv("WindowsSDKVersion")
|
|
207
|
+
if version:
|
|
208
|
+
version = version.rstrip("\\")
|
|
209
|
+
if not check_winsdk(winsdk_base_path, version):
|
|
210
|
+
warnings.warn(
|
|
211
|
+
f"Environment variables WindowsSdkDir = {os.getenv('WindowsSdkDir')}, "
|
|
212
|
+
f"WindowsSDKVersion = {os.getenv('WindowsSDKVersion')} are set, "
|
|
213
|
+
"but this Windows SDK installation is incomplete."
|
|
214
|
+
)
|
|
215
|
+
return None, None
|
|
216
|
+
|
|
217
|
+
return winsdk_base_path, version
|
|
218
|
+
|
|
219
|
+
|
|
173
220
|
def find_winsdk_registry() -> tuple[Optional[Path], Optional[str]]:
|
|
174
221
|
try:
|
|
175
222
|
reg = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
|
|
@@ -212,32 +259,46 @@ def find_winsdk_hardcoded() -> tuple[Optional[Path], Optional[str]]:
|
|
|
212
259
|
return winsdk_base_path, version
|
|
213
260
|
|
|
214
261
|
|
|
215
|
-
def find_winsdk() -> tuple[list[str], list[str]]:
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
262
|
+
def find_winsdk(env_only: bool) -> tuple[list[str], list[str]]:
|
|
263
|
+
if env_only:
|
|
264
|
+
fs = [find_winsdk_env]
|
|
265
|
+
else:
|
|
266
|
+
fs = [
|
|
267
|
+
find_winsdk_env,
|
|
268
|
+
find_winsdk_registry,
|
|
269
|
+
find_winsdk_hardcoded,
|
|
270
|
+
]
|
|
271
|
+
for f in fs:
|
|
272
|
+
winsdk_base_path, version = f()
|
|
273
|
+
if winsdk_base_path:
|
|
274
|
+
return (
|
|
275
|
+
[
|
|
276
|
+
str(winsdk_base_path / "Include" / version / "shared"),
|
|
277
|
+
str(winsdk_base_path / "Include" / version / "ucrt"),
|
|
278
|
+
str(winsdk_base_path / "Include" / version / "um"),
|
|
279
|
+
],
|
|
280
|
+
[
|
|
281
|
+
str(winsdk_base_path / "Lib" / version / "ucrt" / "x64"),
|
|
282
|
+
str(winsdk_base_path / "Lib" / version / "um" / "x64"),
|
|
283
|
+
],
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if not env_only:
|
|
220
287
|
warnings.warn("Failed to find Windows SDK.")
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
return (
|
|
224
|
-
[
|
|
225
|
-
str(winsdk_base_path / "Include" / version / "shared"),
|
|
226
|
-
str(winsdk_base_path / "Include" / version / "ucrt"),
|
|
227
|
-
str(winsdk_base_path / "Include" / version / "um"),
|
|
228
|
-
],
|
|
229
|
-
[
|
|
230
|
-
str(winsdk_base_path / "Lib" / version / "ucrt" / "x64"),
|
|
231
|
-
str(winsdk_base_path / "Lib" / version / "um" / "x64"),
|
|
232
|
-
],
|
|
233
|
-
)
|
|
288
|
+
return [], []
|
|
234
289
|
|
|
235
290
|
|
|
236
291
|
@functools.cache
|
|
237
|
-
def find_msvc_winsdk(
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
292
|
+
def find_msvc_winsdk(
|
|
293
|
+
env_only: bool = False,
|
|
294
|
+
) -> tuple[Optional[str], list[str], list[str]]:
|
|
295
|
+
msvc_bin_path, msvc_inc_dirs, msvc_lib_dirs = find_msvc(env_only)
|
|
296
|
+
winsdk_inc_dirs, winsdk_lib_dirs = find_winsdk(env_only)
|
|
297
|
+
return (
|
|
298
|
+
msvc_bin_path,
|
|
299
|
+
msvc_inc_dirs + winsdk_inc_dirs,
|
|
300
|
+
msvc_lib_dirs + winsdk_lib_dirs,
|
|
301
|
+
)
|
|
241
302
|
|
|
242
303
|
|
|
243
304
|
@functools.cache
|
|
@@ -3,12 +3,12 @@ triton/_internal_testing.py,sha256=OBY28huiEWItqGgiukgZzHLLaSbS8yj9kdhn_u562Yg,5
|
|
|
3
3
|
triton/_utils.py,sha256=5RiCLwW14w0Q3mdZ-9yz-VO5KiSexNj9xeDt4gaNsvE,1014
|
|
4
4
|
triton/errors.py,sha256=8WfnuRKLG578mgY6cBA3ECruVMf9ULEKFNgRcJ6IhWM,89
|
|
5
5
|
triton/testing.py,sha256=ivFf1Fq9frmfVahaVUp0bgJxmvVZNACZfj3Sai6zfAs,20048
|
|
6
|
-
triton/windows_utils.py,sha256=
|
|
7
|
-
triton/_C/libtriton.pyd,sha256=
|
|
6
|
+
triton/windows_utils.py,sha256=YUl-1QbLINQRaAAMNYPjLiTFZlVsCNi9mTFck5aemwk,12778
|
|
7
|
+
triton/_C/libtriton.pyd,sha256=CHlRXTuChUtksf319rUsjMpt_uDJws61neyTqpBb2cU,87276032
|
|
8
8
|
triton/backends/__init__.py,sha256=opAo_vgEMt3tLO_bYFrYGksnIu0qohbmyuu_s3-rNAs,1595
|
|
9
9
|
triton/backends/compiler.py,sha256=ymaG0kpveAuESbQ9QZ0RyXjr0Aq4el_G5XGYogJ2gNA,3588
|
|
10
10
|
triton/backends/driver.py,sha256=AN60upJlPgia0JwvZ8vIVgLITNPuI0fdz8zMIIHPpF4,1450
|
|
11
|
-
triton/backends/amd/compiler.py,sha256=
|
|
11
|
+
triton/backends/amd/compiler.py,sha256=27jurEV7tH5J6BHtOXrdPJyCMYIPHo5G4Op_O66gv4E,19135
|
|
12
12
|
triton/backends/amd/driver.c,sha256=obiiiPndny5NyhUcJ8iyrVHrXU1ruLpLGd_LgaKQEbU,8459
|
|
13
13
|
triton/backends/amd/driver.py,sha256=p8FcTiAq-829p2gRQZ5sPE1_d1SQQ_sOhb5WxI6rXME,20379
|
|
14
14
|
triton/backends/amd/include/hip/channel_descriptor.h,sha256=gTYe7SzIg-m3ThOQY2vr5Rh6-uWvUP_d37v8F4T2Q14,1773
|
|
@@ -112,9 +112,9 @@ triton/backends/amd/lib/asanrtl.bc,sha256=1xv2RlU3WvbdsghHlmhwiHewGM2B5dKts5bERM
|
|
|
112
112
|
triton/backends/amd/lib/ockl.bc,sha256=wQKCzkKukIHbu0lyjKUYlhndc7S27xto6L54J0Bn-C0,246124
|
|
113
113
|
triton/backends/amd/lib/ocml.bc,sha256=UPNTXW0gCXUNB-c6orSYwb-mz9_mjUc7zny_vfFza44,205964
|
|
114
114
|
triton/backends/nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
|
-
triton/backends/nvidia/compiler.py,sha256=
|
|
115
|
+
triton/backends/nvidia/compiler.py,sha256=4BnLUapsoh8lsQPYlm-_ZCyKvU3DgJY3NgDMo8leV9c,19996
|
|
116
116
|
triton/backends/nvidia/driver.c,sha256=RWzdn9_RWaaVxXKtdrvxoRg4yR7WSH7ELRyjGDYKwBM,18880
|
|
117
|
-
triton/backends/nvidia/driver.py,sha256=
|
|
117
|
+
triton/backends/nvidia/driver.py,sha256=cbNUIsppHawGsqf3ctuqc0nRM_kbsMCsH1osWv0fKro,21005
|
|
118
118
|
triton/backends/nvidia/bin/ptxas.exe,sha256=iCva9hIYg-Q2NybchwaQJFkwDzNiliFOTDdZrHPLb6A,24732160
|
|
119
119
|
triton/backends/nvidia/include/cuda.h,sha256=Fn44OjeRImxegJ39apYUspseEfTWNGwpqSGUOnHj5WY,1183268
|
|
120
120
|
triton/backends/nvidia/lib/libdevice.10.bc,sha256=XC-uN8huaMOjhgWpX1EtfRLV89uYYxC-R_VzBKpype4,473728
|
|
@@ -141,9 +141,9 @@ triton/language/extra/hip/__init__.py,sha256=ieSER4LeX9_0horChGUUVwpuKAprkuka8uG
|
|
|
141
141
|
triton/language/extra/hip/libdevice.py,sha256=EVraUfeXzQmN3F5Lleg2mohVcbFWOWlLaAH1nkbqtV4,16841
|
|
142
142
|
triton/runtime/__init__.py,sha256=mKL5cqIBDUw2WO80NRCh4s1G8KYaqgM59TTAbTkPPjQ,621
|
|
143
143
|
triton/runtime/_allocation.py,sha256=zaW4B7I7c-2rkVuN7IZaUB6IQSI1t4FvnTPZH-r7DTk,798
|
|
144
|
-
triton/runtime/autotuner.py,sha256=
|
|
145
|
-
triton/runtime/build.py,sha256=
|
|
146
|
-
triton/runtime/cache.py,sha256=
|
|
144
|
+
triton/runtime/autotuner.py,sha256=0ku0wjPo8xOvom6P4uEVZHsgPkxAFOqP1LjUVue0HLM,17854
|
|
145
|
+
triton/runtime/build.py,sha256=1kRGMsDDg7NHdxKEPmNJ3SQaa_UnBckJF5KfFOwoBbI,3593
|
|
146
|
+
triton/runtime/cache.py,sha256=uoU1UH3HPxkxT0r-69HIZgLBls3T-TDxJedRULX9lbM,10583
|
|
147
147
|
triton/runtime/driver.py,sha256=VZ-883Xri71R72lHB6usIpLo3gGLbZJkAlLP3ewWSpc,1509
|
|
148
148
|
triton/runtime/errors.py,sha256=CwfJXciwel_-K3BfQfKUpLPDWrSyTnGsfJkqJojrdfQ,1052
|
|
149
149
|
triton/runtime/interpreter.py,sha256=RVzNGSXyLlg14LgjJ5BZ3qbqfK49FccG-pXaIXXXE2g,59331
|
|
@@ -245,9 +245,9 @@ triton/tools/disasm.py,sha256=BBO4bALdLcWgWDLhQdYHLlTx3oo8g_d8maeE_Uu-FmU,5088
|
|
|
245
245
|
triton/tools/experimental_descriptor.py,sha256=0Wqy96Cc6YLh9o0eTknW-Lfvha6lfRSfe8bswkcPHMs,1260
|
|
246
246
|
triton/tools/link.py,sha256=u7qtfZRLriZkAMEGNvj8YF-k1cthmLL7BwHYqBgT63E,11871
|
|
247
247
|
triton/tools/mxfp.py,sha256=YQdpBrGkOVNOtnLeRjMCeVFHWkSwUubGeWsItIjO8TU,11737
|
|
248
|
-
triton/tools/extra/cuda/compile.c,sha256=
|
|
248
|
+
triton/tools/extra/cuda/compile.c,sha256=TdIENsqk6wrvv1C4Mk-sq9keXe3SJuMQcf0UpxmjNZk,2153
|
|
249
249
|
triton/tools/extra/cuda/compile.h,sha256=n9QKIFZTL4RSsiXtAxBP9XGSnxjyaevQQ9bBpwDsvAg,332
|
|
250
|
-
triton_windows-3.3.0a0.
|
|
251
|
-
triton_windows-3.3.0a0.
|
|
252
|
-
triton_windows-3.3.0a0.
|
|
253
|
-
triton_windows-3.3.0a0.
|
|
250
|
+
triton_windows-3.3.0a0.post18.dist-info/METADATA,sha256=2h8i8geeJlfffh1CwstG76Xf4jcC2yuGasvloSjd43I,1629
|
|
251
|
+
triton_windows-3.3.0a0.post18.dist-info/WHEEL,sha256=_ZWIY2n7n6SpiuIFl1-RvcMp4Ty36T57FKf-7NzqZHM,101
|
|
252
|
+
triton_windows-3.3.0a0.post18.dist-info/top_level.txt,sha256=KhMzHYsArnZ3IkjAQ-xLnx1n_FjvEpJNelg2xPiDl-U,254
|
|
253
|
+
triton_windows-3.3.0a0.post18.dist-info/RECORD,,
|
{triton_windows-3.3.0a0.post16.dist-info → triton_windows-3.3.0a0.post18.dist-info}/top_level.txt
RENAMED
|
File without changes
|