triton-windows 3.3.0a0.post17__cp313-cp313-win_amd64.whl → 3.3.0.post19__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

triton/_C/libtriton.pyd CHANGED
Binary file
@@ -28,6 +28,10 @@ class HIPOptions:
28
28
  waves_per_eu: int = 1
29
29
  num_stages: int = 2
30
30
  num_ctas: int = 1
31
+ num_buffers_warp_spec: int = 0
32
+ num_consumer_groups: int = 0
33
+ reg_dec_producer: int = 0
34
+ reg_inc_consumer: int = 0
31
35
  extern_libs: dict = None
32
36
  cluster_dims: tuple = (1, 1, 1)
33
37
  debug: bool = False
@@ -141,6 +141,10 @@ class CUDAOptions:
141
141
  num_warps: int = 4
142
142
  num_ctas: int = 1
143
143
  num_stages: int = 3
144
+ num_buffers_warp_spec: int = 0
145
+ num_consumer_groups: int = 0
146
+ reg_dec_producer: int = 0
147
+ reg_inc_consumer: int = 0
144
148
  # maxnreg corresponds to the ptx parameter .maxnreg, which controls the
145
149
  # maximum number of 32-bit registers used by one thread.
146
150
  maxnreg: Optional[int] = None
@@ -285,16 +289,29 @@ class CUDABackend(BaseBackend):
285
289
  passes.ttgpuir.add_optimize_accumulator_init(pm)
286
290
  passes.common.add_canonicalizer(pm)
287
291
  passes.ttgpuir.add_combine_tensor_select_and_if(pm)
292
+ passes.ttgpuir.add_ws_task_partition(pm, opt.num_consumer_groups)
293
+ passes.ttgpuir.add_taskid_propagate(pm, opt.num_consumer_groups)
294
+ passes.ttgpuir.add_ws_data_partition(pm, opt.num_consumer_groups)
295
+ passes.ttgpuir.add_ws_code_partition(pm, opt.num_buffers_warp_spec, opt.num_consumer_groups,
296
+ opt.reg_dec_producer, opt.reg_inc_consumer)
288
297
  passes.ttgpuir.add_pipeline(pm, opt.num_stages, dump_enabled)
298
+ passes.ttgpuir.add_ping_pong_sync(pm, opt.num_consumer_groups)
299
+ passes.ttgpuir.add_ws_lowering(pm, opt.num_consumer_groups)
289
300
  elif capability // 10 >= 10:
290
301
  passes.ttgpuir.add_fuse_nested_loops(pm)
291
302
  passes.common.add_canonicalizer(pm)
292
303
  passes.common.add_licm(pm)
293
304
  passes.ttgpuir.add_optimize_accumulator_init(pm)
305
+ passes.ttgpuir.add_ws_task_partition(pm, opt.num_consumer_groups)
306
+ passes.ttgpuir.add_taskid_propagate(pm, opt.num_consumer_groups)
307
+ passes.ttgpuir.add_ws_data_partition(pm, opt.num_consumer_groups)
308
+ passes.ttgpuir.add_ws_code_partition(pm, opt.num_buffers_warp_spec, opt.num_consumer_groups,
309
+ opt.reg_dec_producer, opt.reg_inc_consumer)
294
310
  passes.ttgpuir.add_pipeline(pm, opt.num_stages, dump_enabled)
295
311
  passes.ttgpuir.add_combine_tensor_select_and_if(pm)
296
312
  nvidia.passes.ttnvgpuir.add_promote_lhs_to_tmem(pm)
297
313
  nvidia.passes.ttnvgpuir.add_keep_acc_in_tmem(pm)
314
+ passes.ttgpuir.add_ws_lowering(pm, opt.num_consumer_groups)
298
315
  passes.common.add_canonicalizer(pm)
299
316
  else:
300
317
  passes.common.add_licm(pm)
@@ -310,6 +327,8 @@ class CUDABackend(BaseBackend):
310
327
  nvidia.passes.ttnvgpuir.add_fence_insertion(pm)
311
328
  nvidia.passes.ttnvgpuir.add_tma_lowering(pm)
312
329
  passes.common.add_canonicalizer(pm)
330
+ if capability // 10 >= 9:
331
+ passes.ttgpuir.add_ws_canonicalization(pm, opt.num_consumer_groups)
313
332
  pm.run(mod)
314
333
  metadata["cluster_dims"] = (cluster_info.clusterDimX, cluster_info.clusterDimY, cluster_info.clusterDimZ)
315
334
  return mod
@@ -416,7 +435,9 @@ class CUDABackend(BaseBackend):
416
435
  opt_level = ['--opt-level', '0'] if os.environ.get("DISABLE_PTXAS_OPT", "0") == "1" else []
417
436
  ptxas_cmd = [ptxas, *line_info, *fmad, '-v', *opt_level, f'--gpu-name={arch}', fsrc.name, '-o', fbin]
418
437
  try:
419
- subprocess.run(ptxas_cmd, check=True, close_fds=True, stdout=flog, stderr=flog)
438
+ # close_fds=True on Windows and False on Linux, see https://github.com/triton-lang/triton/pull/4357
439
+ # On Windows, both stdout and stderr need to be redirected to flog
440
+ subprocess.run(ptxas_cmd, check=True, close_fds=True if os.name == 'nt' else False, stdout=flog, stderr=flog)
420
441
  except subprocess.CalledProcessError as e:
421
442
  with open(flog.name) as log_file:
422
443
  log = log_file.read()
@@ -1,3 +1,4 @@
1
+ #define _CRT_SECURE_NO_WARNINGS
1
2
  #include "cuda.h"
2
3
 
3
4
  #ifndef _WIN32
@@ -74,6 +74,10 @@ def compile_module_from_src(src, name):
74
74
  so = _build(name, src_path, tmpdir, library_dirs(), include_dir, libraries)
75
75
  with open(so, "rb") as f:
76
76
  cache_path = cache.put(f.read(), f"{name}.{ext}", binary=True)
77
+
78
+ # Loading module with relative path may cause error
79
+ cache_path = os.path.abspath(cache_path)
80
+
77
81
  import importlib.util
78
82
  spec = importlib.util.spec_from_file_location(name, cache_path)
79
83
  mod = importlib.util.module_from_spec(spec)
@@ -204,6 +208,7 @@ def make_launcher(constants, signature):
204
208
  params = [f"&arg{i}" for i, ty in signature.items() if ty != "constexpr"]
205
209
  params.append("&global_scratch")
206
210
  src = f"""
211
+ #define _CRT_SECURE_NO_WARNINGS
207
212
  #include \"cuda.h\"
208
213
  #include <stdbool.h>
209
214
  #define PY_SSIZE_T_CLEAN
@@ -36,7 +36,10 @@ class Autotuner(KernelInterface):
36
36
  'prune_num_stages_by'(optional): a function used to prune num_stages. It takes configs:List[Config] as its input, and returns pruned configs.
37
37
  """
38
38
  if not configs:
39
- self.configs = [Config({}, num_warps=4, num_stages=3, num_ctas=1)]
39
+ self.configs = [
40
+ Config({}, num_warps=4, num_stages=3, num_ctas=1, num_buffers_warp_spec=0, num_consumer_groups=0,
41
+ reg_dec_producer=0, reg_inc_consumer=0)
42
+ ]
40
43
  else:
41
44
  self.configs = configs
42
45
  self.keys = key
@@ -269,11 +272,16 @@ class Config:
269
272
  function are args.
270
273
  """
271
274
 
272
- def __init__(self, kwargs, num_warps=4, num_stages=3, num_ctas=1, maxnreg=None, pre_hook=None):
275
+ def __init__(self, kwargs, num_warps=4, num_stages=3, num_ctas=1, num_buffers_warp_spec=0, num_consumer_groups=0,
276
+ reg_dec_producer=0, reg_inc_consumer=0, maxnreg=None, pre_hook=None):
273
277
  self.kwargs = kwargs
274
278
  self.num_warps = num_warps
275
279
  self.num_ctas = num_ctas
276
280
  self.num_stages = num_stages
281
+ self.num_buffers_warp_spec = num_buffers_warp_spec
282
+ self.num_consumer_groups = num_consumer_groups
283
+ self.reg_dec_producer = reg_dec_producer
284
+ self.reg_inc_consumer = reg_inc_consumer
277
285
  self.maxnreg = maxnreg
278
286
  self.pre_hook = pre_hook
279
287
 
@@ -285,6 +293,10 @@ class Config:
285
293
  ("num_warps", self.num_warps),
286
294
  ("num_ctas", self.num_ctas),
287
295
  ("num_stages", self.num_stages),
296
+ ("num_buffers_warp_spec", self.num_buffers_warp_spec),
297
+ ("num_consumer_groups", self.num_consumer_groups),
298
+ ("reg_dec_producer", self.reg_dec_producer),
299
+ ("reg_inc_consumer", self.reg_inc_consumer),
288
300
  ("maxnreg", self.maxnreg),
289
301
  ) if v is not None
290
302
  }
@@ -297,6 +309,10 @@ class Config:
297
309
  res.append(f"num_warps: {self.num_warps}")
298
310
  res.append(f"num_ctas: {self.num_ctas}")
299
311
  res.append(f"num_stages: {self.num_stages}")
312
+ res.append(f"num_buffers_warp_spec: {self.num_buffers_warp_spec}")
313
+ res.append(f"num_consumer_groups: {self.num_consumer_groups}")
314
+ res.append(f"reg_dec_producer: {self.reg_dec_producer}")
315
+ res.append(f"reg_inc_consumer: {self.reg_inc_consumer}")
300
316
  res.append(f"maxnreg: {self.maxnreg}")
301
317
  return ", ".join(res)
302
318
 
triton/runtime/build.py CHANGED
@@ -1,3 +1,4 @@
1
+ import functools
1
2
  import sysconfig
2
3
  import os
3
4
  import shutil
@@ -7,8 +8,12 @@ if os.name == "nt":
7
8
  from triton.windows_utils import find_msvc_winsdk, find_python
8
9
 
9
10
 
11
+ @functools.cache
10
12
  def get_cc():
11
13
  cc = os.environ.get("CC")
14
+ if cc is None:
15
+ # Find and check MSVC and Windows SDK from environment variables set by Launch-VsDevShell.ps1 or VsDevCmd.bat
16
+ cc, _, _ = find_msvc_winsdk(env_only=True)
12
17
  if cc is None:
13
18
  # Bundled TinyCC
14
19
  cc = os.path.join(sysconfig.get_paths()["platlib"], "triton", "runtime", "tcc", "tcc.exe")
@@ -30,6 +35,11 @@ def is_msvc(cc):
30
35
  return cc == "cl" or cc == "cl.exe"
31
36
 
32
37
 
38
+ def is_clang(cc):
39
+ cc = os.path.basename(cc).lower()
40
+ return cc == "clang" or cc == "clang.exe"
41
+
42
+
33
43
  def _cc_cmd(cc, src, out, include_dirs, library_dirs, libraries):
34
44
  if is_msvc(cc):
35
45
  out_base = os.path.splitext(out)[0]
@@ -44,7 +54,10 @@ def _cc_cmd(cc, src, out, include_dirs, library_dirs, libraries):
44
54
  cc_cmd += [f"/PDB:{out_base + '.pdb'}"]
45
55
  else:
46
56
  # for -Wno-psabi, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111047
47
- cc_cmd = [cc, src, "-O3", "-shared", "-fPIC", "-Wno-psabi", "-o", out]
57
+ cc_cmd = [cc, src, "-O3", "-shared", "-Wno-psabi", "-o", out]
58
+ if not (os.name == "nt" and is_clang(cc)):
59
+ # Clang does not support -fPIC on Windows
60
+ cc_cmd += ["-fPIC"]
48
61
  cc_cmd += [f'-l{lib}' for lib in libraries]
49
62
  cc_cmd += [f"-L{dir}" for dir in library_dirs]
50
63
  cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None]
@@ -75,9 +88,15 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
75
88
  if "python3" not in libraries:
76
89
  libraries += ["python3"]
77
90
  if is_msvc(cc):
78
- msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
91
+ _, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
79
92
  include_dirs += msvc_winsdk_inc_dirs
80
93
  library_dirs += msvc_winsdk_lib_dirs
81
94
  cc_cmd = _cc_cmd(cc, src, so, include_dirs, library_dirs, libraries)
82
- subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL)
95
+
96
+ try:
97
+ ret = subprocess.check_call(cc_cmd)
98
+ except Exception as e:
99
+ print("Failed to compile. cc_cmd:", cc_cmd)
100
+ raise e
101
+
83
102
  return so
@@ -65,4 +65,5 @@ CUresult {kernel_name}(CUstream stream, {signature}) {{
65
65
  // TODO: shared memory
66
66
  if(gX * gY * gZ > 0)
67
67
  return cuLaunchKernel({kernel_name}_func, gX, gY, gZ, {num_warps} * 32, 1, 1, {shared}, stream, args, NULL);
68
+ return (CUresult)NULL;
68
69
  }}
triton/windows_utils.py CHANGED
@@ -57,12 +57,31 @@ def check_msvc(msvc_base_path: Path, version: str) -> bool:
57
57
  return all(
58
58
  x.exists()
59
59
  for x in [
60
+ msvc_base_path / version / "bin" / "Hostx64" / "x64" / "cl.exe",
60
61
  msvc_base_path / version / "include" / "vcruntime.h",
61
62
  msvc_base_path / version / "lib" / "x64" / "vcruntime.lib",
62
63
  ]
63
64
  )
64
65
 
65
66
 
67
+ def find_msvc_env() -> tuple[Optional[Path], Optional[str]]:
68
+ msvc_base_path = os.getenv("VCINSTALLDIR")
69
+ if msvc_base_path is None:
70
+ return None, None
71
+ msvc_base_path = Path(msvc_base_path) / "Tools" / "MSVC"
72
+
73
+ version = os.getenv("VCToolsVersion")
74
+ if not check_msvc(msvc_base_path, version):
75
+ warnings.warn(
76
+ f"Environment variables VCINSTALLDIR = {os.getenv('VCINSTALLDIR')}, "
77
+ f"VCToolsVersion = {os.getenv('VCToolsVersion')} are set, "
78
+ "but this MSVC installation is incomplete."
79
+ )
80
+ return None, None
81
+
82
+ return msvc_base_path, version
83
+
84
+
66
85
  def find_msvc_vswhere() -> tuple[Optional[Path], Optional[str]]:
67
86
  vswhere_path = find_in_program_files(
68
87
  r"Microsoft Visual Studio\Installer\vswhere.exe"
@@ -144,20 +163,28 @@ def find_msvc_hardcoded() -> tuple[Optional[Path], Optional[str]]:
144
163
  return None, None
145
164
 
146
165
 
147
- def find_msvc() -> tuple[list[str], list[str]]:
148
- msvc_base_path, version = find_msvc_vswhere()
149
- if msvc_base_path is None:
150
- msvc_base_path, version = find_msvc_envpath()
151
- if msvc_base_path is None:
152
- msvc_base_path, version = find_msvc_hardcoded()
153
- if msvc_base_path is None:
166
+ def find_msvc(env_only: bool) -> tuple[Optional[str], list[str], list[str]]:
167
+ if env_only:
168
+ fs = [find_msvc_env]
169
+ else:
170
+ fs = [
171
+ find_msvc_env,
172
+ find_msvc_vswhere,
173
+ find_msvc_envpath,
174
+ find_msvc_hardcoded,
175
+ ]
176
+ for f in fs:
177
+ msvc_base_path, version = f()
178
+ if msvc_base_path:
179
+ return (
180
+ str(msvc_base_path / version / "bin" / "Hostx64" / "x64" / "cl.exe"),
181
+ [str(msvc_base_path / version / "include")],
182
+ [str(msvc_base_path / version / "lib" / "x64")],
183
+ )
184
+
185
+ if not env_only:
154
186
  warnings.warn("Failed to find MSVC.")
155
- return [], []
156
-
157
- return (
158
- [str(msvc_base_path / version / "include")],
159
- [str(msvc_base_path / version / "lib" / "x64")],
160
- )
187
+ return None, [], []
161
188
 
162
189
 
163
190
  def check_winsdk(winsdk_base_path: Path, version: str) -> bool:
@@ -170,6 +197,26 @@ def check_winsdk(winsdk_base_path: Path, version: str) -> bool:
170
197
  )
171
198
 
172
199
 
200
+ def find_winsdk_env() -> tuple[Optional[Path], Optional[str]]:
201
+ winsdk_base_path = os.getenv("WindowsSdkDir")
202
+ if winsdk_base_path is None:
203
+ return None, None
204
+ winsdk_base_path = Path(winsdk_base_path)
205
+
206
+ version = os.getenv("WindowsSDKVersion")
207
+ if version:
208
+ version = version.rstrip("\\")
209
+ if not check_winsdk(winsdk_base_path, version):
210
+ warnings.warn(
211
+ f"Environment variables WindowsSdkDir = {os.getenv('WindowsSdkDir')}, "
212
+ f"WindowsSDKVersion = {os.getenv('WindowsSDKVersion')} are set, "
213
+ "but this Windows SDK installation is incomplete."
214
+ )
215
+ return None, None
216
+
217
+ return winsdk_base_path, version
218
+
219
+
173
220
  def find_winsdk_registry() -> tuple[Optional[Path], Optional[str]]:
174
221
  try:
175
222
  reg = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
@@ -212,32 +259,46 @@ def find_winsdk_hardcoded() -> tuple[Optional[Path], Optional[str]]:
212
259
  return winsdk_base_path, version
213
260
 
214
261
 
215
- def find_winsdk() -> tuple[list[str], list[str]]:
216
- winsdk_base_path, version = find_winsdk_registry()
217
- if winsdk_base_path is None:
218
- winsdk_base_path, version = find_winsdk_hardcoded()
219
- if winsdk_base_path is None:
262
+ def find_winsdk(env_only: bool) -> tuple[list[str], list[str]]:
263
+ if env_only:
264
+ fs = [find_winsdk_env]
265
+ else:
266
+ fs = [
267
+ find_winsdk_env,
268
+ find_winsdk_registry,
269
+ find_winsdk_hardcoded,
270
+ ]
271
+ for f in fs:
272
+ winsdk_base_path, version = f()
273
+ if winsdk_base_path:
274
+ return (
275
+ [
276
+ str(winsdk_base_path / "Include" / version / "shared"),
277
+ str(winsdk_base_path / "Include" / version / "ucrt"),
278
+ str(winsdk_base_path / "Include" / version / "um"),
279
+ ],
280
+ [
281
+ str(winsdk_base_path / "Lib" / version / "ucrt" / "x64"),
282
+ str(winsdk_base_path / "Lib" / version / "um" / "x64"),
283
+ ],
284
+ )
285
+
286
+ if not env_only:
220
287
  warnings.warn("Failed to find Windows SDK.")
221
- return [], []
222
-
223
- return (
224
- [
225
- str(winsdk_base_path / "Include" / version / "shared"),
226
- str(winsdk_base_path / "Include" / version / "ucrt"),
227
- str(winsdk_base_path / "Include" / version / "um"),
228
- ],
229
- [
230
- str(winsdk_base_path / "Lib" / version / "ucrt" / "x64"),
231
- str(winsdk_base_path / "Lib" / version / "um" / "x64"),
232
- ],
233
- )
288
+ return [], []
234
289
 
235
290
 
236
291
  @functools.cache
237
- def find_msvc_winsdk() -> tuple[list[str], list[str]]:
238
- msvc_inc_dirs, msvc_lib_dirs = find_msvc()
239
- winsdk_inc_dirs, winsdk_lib_dirs = find_winsdk()
240
- return msvc_inc_dirs + winsdk_inc_dirs, msvc_lib_dirs + winsdk_lib_dirs
292
+ def find_msvc_winsdk(
293
+ env_only: bool = False,
294
+ ) -> tuple[Optional[str], list[str], list[str]]:
295
+ msvc_bin_path, msvc_inc_dirs, msvc_lib_dirs = find_msvc(env_only)
296
+ winsdk_inc_dirs, winsdk_lib_dirs = find_winsdk(env_only)
297
+ return (
298
+ msvc_bin_path,
299
+ msvc_inc_dirs + winsdk_inc_dirs,
300
+ msvc_lib_dirs + winsdk_lib_dirs,
301
+ )
241
302
 
242
303
 
243
304
  @functools.cache
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: triton-windows
3
- Version: 3.3.0a0.post17
3
+ Version: 3.3.0.post19
4
4
  Summary: A language and compiler for custom Deep Learning operations
5
5
  Home-page: https://github.com/woct0rdho/triton-windows
6
6
  Author: Philippe Tillet, Dian Wu
@@ -3,12 +3,12 @@ triton/_internal_testing.py,sha256=OBY28huiEWItqGgiukgZzHLLaSbS8yj9kdhn_u562Yg,5
3
3
  triton/_utils.py,sha256=5RiCLwW14w0Q3mdZ-9yz-VO5KiSexNj9xeDt4gaNsvE,1014
4
4
  triton/errors.py,sha256=8WfnuRKLG578mgY6cBA3ECruVMf9ULEKFNgRcJ6IhWM,89
5
5
  triton/testing.py,sha256=ivFf1Fq9frmfVahaVUp0bgJxmvVZNACZfj3Sai6zfAs,20048
6
- triton/windows_utils.py,sha256=aQMItmuZNXaki8zSB7HMvhy1RsXlmz1GcO-lORm1IIk,10852
7
- triton/_C/libtriton.pyd,sha256=tpuXfEgiXtK7rI2bLifCyKMkSp8Vt9xwi7e1fwiRIG0,86837248
6
+ triton/windows_utils.py,sha256=YUl-1QbLINQRaAAMNYPjLiTFZlVsCNi9mTFck5aemwk,12778
7
+ triton/_C/libtriton.pyd,sha256=fBQdubPERBR6FRKjYoRwK9ZGv_XRr6EMGBhHDll0e1M,87287808
8
8
  triton/backends/__init__.py,sha256=opAo_vgEMt3tLO_bYFrYGksnIu0qohbmyuu_s3-rNAs,1595
9
9
  triton/backends/compiler.py,sha256=ymaG0kpveAuESbQ9QZ0RyXjr0Aq4el_G5XGYogJ2gNA,3588
10
10
  triton/backends/driver.py,sha256=AN60upJlPgia0JwvZ8vIVgLITNPuI0fdz8zMIIHPpF4,1450
11
- triton/backends/amd/compiler.py,sha256=7Ej6QPi85Jwdx7E27d7A--a00OipDkpgiL3JNnCwoTo,19007
11
+ triton/backends/amd/compiler.py,sha256=27jurEV7tH5J6BHtOXrdPJyCMYIPHo5G4Op_O66gv4E,19135
12
12
  triton/backends/amd/driver.c,sha256=obiiiPndny5NyhUcJ8iyrVHrXU1ruLpLGd_LgaKQEbU,8459
13
13
  triton/backends/amd/driver.py,sha256=p8FcTiAq-829p2gRQZ5sPE1_d1SQQ_sOhb5WxI6rXME,20379
14
14
  triton/backends/amd/include/hip/channel_descriptor.h,sha256=gTYe7SzIg-m3ThOQY2vr5Rh6-uWvUP_d37v8F4T2Q14,1773
@@ -112,9 +112,9 @@ triton/backends/amd/lib/asanrtl.bc,sha256=1xv2RlU3WvbdsghHlmhwiHewGM2B5dKts5bERM
112
112
  triton/backends/amd/lib/ockl.bc,sha256=wQKCzkKukIHbu0lyjKUYlhndc7S27xto6L54J0Bn-C0,246124
113
113
  triton/backends/amd/lib/ocml.bc,sha256=UPNTXW0gCXUNB-c6orSYwb-mz9_mjUc7zny_vfFza44,205964
114
114
  triton/backends/nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
- triton/backends/nvidia/compiler.py,sha256=iHbIl-HXSgD3S5eJIXbQhr-Cyk9Gq_g8RXDFZxAGUF4,18444
116
- triton/backends/nvidia/driver.c,sha256=RWzdn9_RWaaVxXKtdrvxoRg4yR7WSH7ELRyjGDYKwBM,18880
117
- triton/backends/nvidia/driver.py,sha256=BcEzr0_ONM8H_DKoCbNuO1jRpBM_XHKBgt_Y2bG9aZw,20902
115
+ triton/backends/nvidia/compiler.py,sha256=4BnLUapsoh8lsQPYlm-_ZCyKvU3DgJY3NgDMo8leV9c,19996
116
+ triton/backends/nvidia/driver.c,sha256=HqtiJDPRxVav9pHl_swBja28RasjHZ7oFOpWKMT627c,18912
117
+ triton/backends/nvidia/driver.py,sha256=kMMVl4SMbJc6obI2Brxoj7RNJt6YBGKYSB56dPeUY6w,21037
118
118
  triton/backends/nvidia/bin/ptxas.exe,sha256=iCva9hIYg-Q2NybchwaQJFkwDzNiliFOTDdZrHPLb6A,24732160
119
119
  triton/backends/nvidia/include/cuda.h,sha256=Fn44OjeRImxegJ39apYUspseEfTWNGwpqSGUOnHj5WY,1183268
120
120
  triton/backends/nvidia/lib/libdevice.10.bc,sha256=XC-uN8huaMOjhgWpX1EtfRLV89uYYxC-R_VzBKpype4,473728
@@ -141,8 +141,8 @@ triton/language/extra/hip/__init__.py,sha256=ieSER4LeX9_0horChGUUVwpuKAprkuka8uG
141
141
  triton/language/extra/hip/libdevice.py,sha256=EVraUfeXzQmN3F5Lleg2mohVcbFWOWlLaAH1nkbqtV4,16841
142
142
  triton/runtime/__init__.py,sha256=mKL5cqIBDUw2WO80NRCh4s1G8KYaqgM59TTAbTkPPjQ,621
143
143
  triton/runtime/_allocation.py,sha256=zaW4B7I7c-2rkVuN7IZaUB6IQSI1t4FvnTPZH-r7DTk,798
144
- triton/runtime/autotuner.py,sha256=h1I6TDPPYtMvuwOkeprcSD3VKKGXk952jXN64oPIFXs,16844
145
- triton/runtime/build.py,sha256=6Ie4gn4-Zk1DIW3o-Nd002IhtEm_yikXh9o6tUm6iao,3279
144
+ triton/runtime/autotuner.py,sha256=0ku0wjPo8xOvom6P4uEVZHsgPkxAFOqP1LjUVue0HLM,17854
145
+ triton/runtime/build.py,sha256=6akPf7jgSRDWqcDGvJXxxiO0MzH6503YrM7xmr24l-Y,3825
146
146
  triton/runtime/cache.py,sha256=uoU1UH3HPxkxT0r-69HIZgLBls3T-TDxJedRULX9lbM,10583
147
147
  triton/runtime/driver.py,sha256=VZ-883Xri71R72lHB6usIpLo3gGLbZJkAlLP3ewWSpc,1509
148
148
  triton/runtime/errors.py,sha256=CwfJXciwel_-K3BfQfKUpLPDWrSyTnGsfJkqJojrdfQ,1052
@@ -245,9 +245,9 @@ triton/tools/disasm.py,sha256=BBO4bALdLcWgWDLhQdYHLlTx3oo8g_d8maeE_Uu-FmU,5088
245
245
  triton/tools/experimental_descriptor.py,sha256=0Wqy96Cc6YLh9o0eTknW-Lfvha6lfRSfe8bswkcPHMs,1260
246
246
  triton/tools/link.py,sha256=u7qtfZRLriZkAMEGNvj8YF-k1cthmLL7BwHYqBgT63E,11871
247
247
  triton/tools/mxfp.py,sha256=YQdpBrGkOVNOtnLeRjMCeVFHWkSwUubGeWsItIjO8TU,11737
248
- triton/tools/extra/cuda/compile.c,sha256=Me7beHPc6WNTwjg85H84DUMCRu4KJdVK2hNNgvlhBZ4,2126
248
+ triton/tools/extra/cuda/compile.c,sha256=TdIENsqk6wrvv1C4Mk-sq9keXe3SJuMQcf0UpxmjNZk,2153
249
249
  triton/tools/extra/cuda/compile.h,sha256=n9QKIFZTL4RSsiXtAxBP9XGSnxjyaevQQ9bBpwDsvAg,332
250
- triton_windows-3.3.0a0.post17.dist-info/METADATA,sha256=Hj5t_K8_B7fKw2Tp4ki_-rOSVDJ4TWFlJMJ8S_tqShc,1629
251
- triton_windows-3.3.0a0.post17.dist-info/WHEEL,sha256=L_iEQrKQ0abSxZWLpYPlBsDYbELTe7Z2WP5jxUcelf8,101
252
- triton_windows-3.3.0a0.post17.dist-info/top_level.txt,sha256=KhMzHYsArnZ3IkjAQ-xLnx1n_FjvEpJNelg2xPiDl-U,254
253
- triton_windows-3.3.0a0.post17.dist-info/RECORD,,
250
+ triton_windows-3.3.0.post19.dist-info/METADATA,sha256=n16mof1EMhLJdnP2tX85LoRgmCIpznA9Bx1IJBS-O2w,1627
251
+ triton_windows-3.3.0.post19.dist-info/WHEEL,sha256=JI6TipV6L5PIs92M_MEmWFBmpkHoCQOVHMpLZsFLd2o,101
252
+ triton_windows-3.3.0.post19.dist-info/top_level.txt,sha256=KhMzHYsArnZ3IkjAQ-xLnx1n_FjvEpJNelg2xPiDl-U,254
253
+ triton_windows-3.3.0.post19.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (77.0.1)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp313-cp313-win_amd64
5
5