tilelang-rocm 0.1.4.post10__cp310-cp310-manylinux1_x86_64.whl → 0.1.4.post12__cp310-cp310-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tilelang/VERSION +1 -1
- tilelang/autotuner/__init__.py +56 -6
- tilelang/engine/phase.py +8 -7
- tilelang/intrinsics/mfma_macro_generator.py +11 -3
- tilelang/jit/adapter/cython/adapter.py +2 -2
- tilelang/jit/adapter/cython/cython_wrapper.pyx +25 -24
- tilelang/lib/libtilelang.so +0 -0
- tilelang/lib/libtilelang_module.so +0 -0
- tilelang/lib/libtvm.so +0 -0
- tilelang/lib/libtvm_runtime.so +0 -0
- tilelang/src/tl_templates/hip/common.h +13 -0
- tilelang/src/tl_templates/hip/hip_fp8.h +30 -0
- {tilelang_rocm-0.1.4.post10.dist-info → tilelang_rocm-0.1.4.post12.dist-info}/METADATA +1 -1
- {tilelang_rocm-0.1.4.post10.dist-info → tilelang_rocm-0.1.4.post12.dist-info}/RECORD +17 -17
- {tilelang_rocm-0.1.4.post10.dist-info → tilelang_rocm-0.1.4.post12.dist-info}/WHEEL +0 -0
- {tilelang_rocm-0.1.4.post10.dist-info → tilelang_rocm-0.1.4.post12.dist-info}/licenses/LICENSE +0 -0
- {tilelang_rocm-0.1.4.post10.dist-info → tilelang_rocm-0.1.4.post12.dist-info}/top_level.txt +0 -0
tilelang/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.4.
|
1
|
+
0.1.4.post12
|
tilelang/autotuner/__init__.py
CHANGED
@@ -167,7 +167,7 @@ class AutoTuner:
|
|
167
167
|
max_mismatched_ratio: float = 0.01,
|
168
168
|
skip_check: bool = False,
|
169
169
|
manual_check_prog: Callable = None,
|
170
|
-
cache_input_tensors: bool =
|
170
|
+
cache_input_tensors: bool = False):
|
171
171
|
"""Set profiling arguments for the auto-tuner.
|
172
172
|
|
173
173
|
Args:
|
@@ -463,8 +463,26 @@ class _AutoTunerImplementation:
|
|
463
463
|
rep: int = 100
|
464
464
|
timeout: int = 100
|
465
465
|
configs: Any = None
|
466
|
-
|
467
|
-
|
466
|
+
supply_type: tilelang.TensorSupplyType = tilelang.TensorSupplyType.Auto
|
467
|
+
ref_prog: Callable = None
|
468
|
+
supply_prog: Callable = None
|
469
|
+
rtol: float = 1e-2
|
470
|
+
atol: float = 1e-2
|
471
|
+
max_mismatched_ratio: float = 0.01
|
472
|
+
skip_check: bool = False
|
473
|
+
manual_check_prog: Callable = None
|
474
|
+
cache_input_tensors: bool = False
|
475
|
+
|
476
|
+
def __init__(self, configs: Any, warmup: int = 25, rep: int = 100, timeout: int = 100,
|
477
|
+
supply_type: tilelang.TensorSupplyType = tilelang.TensorSupplyType.Auto,
|
478
|
+
ref_prog: Callable = None,
|
479
|
+
supply_prog: Callable = None,
|
480
|
+
rtol: float = 1e-2,
|
481
|
+
atol: float = 1e-2,
|
482
|
+
max_mismatched_ratio: float = 0.01,
|
483
|
+
skip_check: bool = False,
|
484
|
+
manual_check_prog: Callable = None,
|
485
|
+
cache_input_tensors: bool = False) -> None:
|
468
486
|
"""Initialize the AutoTunerImplementation.
|
469
487
|
|
470
488
|
Args:
|
@@ -509,7 +527,17 @@ class _AutoTunerImplementation:
|
|
509
527
|
def jit_compile(**config_arg):
|
510
528
|
return fn(*args, **kwargs, __tune_params=config_arg)
|
511
529
|
|
512
|
-
autotuner = AutoTuner(fn, configs=configs)
|
530
|
+
autotuner = AutoTuner(fn, configs=configs).set_profile_args(
|
531
|
+
supply_type=self.supply_type,
|
532
|
+
ref_prog=self.ref_prog,
|
533
|
+
supply_prog=self.supply_prog,
|
534
|
+
rtol=self.rtol,
|
535
|
+
atol=self.atol,
|
536
|
+
max_mismatched_ratio=self.max_mismatched_ratio,
|
537
|
+
skip_check=self.skip_check,
|
538
|
+
manual_check_prog=self.manual_check_prog,
|
539
|
+
cache_input_tensors=self.cache_input_tensors,
|
540
|
+
)
|
513
541
|
autotuner.jit_compile = jit_compile
|
514
542
|
autotuner.run = partial(autotuner.run, warmup, rep, timeout)
|
515
543
|
|
@@ -525,9 +553,21 @@ def autotune( # This is the new public interface
|
|
525
553
|
func: Union[Callable[_P, _RProg], PrimFunc, None] = None,
|
526
554
|
*, # Indicates subsequent arguments are keyword-only
|
527
555
|
configs: Any,
|
556
|
+
# profile arguments
|
528
557
|
warmup: int = 25,
|
529
558
|
rep: int = 100,
|
530
|
-
timeout: int = 100
|
559
|
+
timeout: int = 100,
|
560
|
+
# compile arguments
|
561
|
+
supply_type: tilelang.TensorSupplyType = tilelang.TensorSupplyType.Auto,
|
562
|
+
ref_prog: Callable = None,
|
563
|
+
supply_prog: Callable = None,
|
564
|
+
rtol: float = 1e-2,
|
565
|
+
atol: float = 1e-2,
|
566
|
+
max_mismatched_ratio: float = 0.01,
|
567
|
+
skip_check: bool = False,
|
568
|
+
manual_check_prog: Callable = None,
|
569
|
+
cache_input_tensors: bool = False,
|
570
|
+
):
|
531
571
|
"""
|
532
572
|
Just-In-Time (JIT) compiler decorator for TileLang functions.
|
533
573
|
|
@@ -571,5 +611,15 @@ def autotune( # This is the new public interface
|
|
571
611
|
# Create a _AutoTunerImplementation instance with the provided/defaulted arguments.
|
572
612
|
# This instance is a decorator that will be applied to the function later.
|
573
613
|
configured_decorator = _AutoTunerImplementation(
|
574
|
-
configs=configs, warmup=warmup, rep=rep, timeout=timeout
|
614
|
+
configs=configs, warmup=warmup, rep=rep, timeout=timeout,
|
615
|
+
supply_type=supply_type,
|
616
|
+
ref_prog=ref_prog,
|
617
|
+
supply_prog=supply_prog,
|
618
|
+
rtol=rtol,
|
619
|
+
atol=atol,
|
620
|
+
max_mismatched_ratio=max_mismatched_ratio,
|
621
|
+
skip_check=skip_check,
|
622
|
+
manual_check_prog=manual_check_prog,
|
623
|
+
cache_input_tensors=cache_input_tensors,
|
624
|
+
)
|
575
625
|
return configured_decorator
|
tilelang/engine/phase.py
CHANGED
@@ -142,13 +142,14 @@ def OptimizeForTarget(mod: IRModule, target: Target) -> IRModule:
|
|
142
142
|
mod = tilelang.transform.AnnotateDeviceRegions()(mod)
|
143
143
|
mod = tir.transform.SplitHostDevice()(mod)
|
144
144
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
145
|
+
mod = tir.transform.MergeSharedMemoryAllocations()(mod)
|
146
|
+
# if allow_warp_specialized(pass_ctx=pass_ctx, target=target):
|
147
|
+
# # This is a workaround to avoid the bug in the MergeSharedMemoryAllocations pass
|
148
|
+
# # when warp specialization is enabled, as different warp threads may access different
|
149
|
+
# # buffers, but the liveness analysis is hard because we need to do pipeline.
|
150
|
+
# mod = tir.transform.MergeSharedMemoryAllocations()(mod)
|
151
|
+
# else:
|
152
|
+
# mod = tilelang.transform.MergeSharedMemoryAllocations()(mod)
|
152
153
|
|
153
154
|
mod = tilelang.transform.ThreadSync("shared")(mod)
|
154
155
|
mod = tilelang.transform.ThreadSync("shared.dyn")(mod)
|
@@ -358,6 +358,8 @@ class MatrixCoreIntrinEmitter(object):
|
|
358
358
|
BLOCK_M = block_row_warps * warp_rows
|
359
359
|
BLOCK_N = block_col_warps * warp_cols
|
360
360
|
M_DIM, N_DIM = self.M_DIM, self.N_DIM
|
361
|
+
C_buf_dims = len(C_buf.shape)
|
362
|
+
assert C_buf_dims in {2, 4}, "C_buf should be 2D or 4D"
|
361
363
|
|
362
364
|
# STS
|
363
365
|
# MMA Store must be in simulated instead of TVM Intrins
|
@@ -369,9 +371,15 @@ class MatrixCoreIntrinEmitter(object):
|
|
369
371
|
for i, j in T.grid(warp_rows, warp_cols):
|
370
372
|
for local_id in T.vectorized(local_size_out):
|
371
373
|
row, col = T.meta_var(mfma_store_index_map(tx, local_id))
|
372
|
-
|
373
|
-
|
374
|
-
|
374
|
+
if C_buf_dims == 2:
|
375
|
+
C_buf[(warp_m * warp_rows + i) * M_DIM + row,
|
376
|
+
(warp_n * warp_cols + j) * N_DIM +
|
377
|
+
col] = C_local_buf[i * (warp_cols * local_size_out) +
|
378
|
+
j * local_size_out + local_id]
|
379
|
+
else:
|
380
|
+
C_buf[warp_m * warp_rows + i, warp_n * warp_cols + j, row,
|
381
|
+
col] = C_local_buf[i * warp_cols * local_size_out + j * local_size_out +
|
382
|
+
local_id]
|
375
383
|
|
376
384
|
@T.macro
|
377
385
|
def _warp_stmatrix_global(C_local_buf, C_buf, thread_binding):
|
@@ -432,8 +432,8 @@ class CythonKernelAdapter(BaseKernelAdapter):
|
|
432
432
|
def _convert_torch_func(self) -> Callable:
|
433
433
|
"""Returns a PyTorch-compatible function wrapper for the kernel."""
|
434
434
|
|
435
|
-
def lambda_forward(*args, stream: int = -1):
|
436
|
-
return self.cython_wrapper.forward([*args], stream=stream)
|
435
|
+
def lambda_forward(*args, stream: int = -1, skip_check: bool = False):
|
436
|
+
return self.cython_wrapper.forward([*args], stream=stream, skip_check=skip_check)
|
437
437
|
|
438
438
|
return lambda_forward
|
439
439
|
|
@@ -66,7 +66,7 @@ cdef class CythonKernelWrapper:
|
|
66
66
|
self.buffer_device_map = buffer_device_map
|
67
67
|
return self
|
68
68
|
|
69
|
-
cpdef forward(self, list inputs, int64_t stream = -1):
|
69
|
+
cpdef forward(self, list inputs, int64_t stream = -1, bint skip_check = False):
|
70
70
|
# Validate input dimensions and prepare for kernel execution
|
71
71
|
cdef int total_params = len(self.params)
|
72
72
|
cdef int total_inputs = len(inputs)
|
@@ -135,29 +135,30 @@ cdef class CythonKernelWrapper:
|
|
135
135
|
raise ValueError(f"Unsupported tensor type: {type(tensor)}")
|
136
136
|
|
137
137
|
# Check buffer device
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
(
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
if tensor_list[buffer_idx].
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
138
|
+
if not skip_check:
|
139
|
+
# cdef str tensor_list_device_type = tensor_list[0].device.type
|
140
|
+
if isinstance(tensor_list[0], torch.Tensor):
|
141
|
+
tensor_list_device_type = tensor_list[0].device.type
|
142
|
+
for param, (buffer_idx, device) in self.buffer_device_map.items():
|
143
|
+
if isinstance(tensor_list[buffer_idx], torch.Tensor):
|
144
|
+
tensor_device = tensor_list[buffer_idx].device
|
145
|
+
# Compare device types and indices separately to handle both string and torch.device objects
|
146
|
+
if (tensor_list_device_type != device.type or
|
147
|
+
(tensor_device.index is not None and device.index is not None and tensor_device.index != device.index)):
|
148
|
+
raise ValueError(f"Buffer device mismatch for parameter {param}: expected {device}, got {tensor_device}")
|
149
|
+
|
150
|
+
# Check buffer dtype map
|
151
|
+
for param, (buffer_idx, torch_dtype) in self.buffer_dtype_map.items():
|
152
|
+
if isinstance(tensor_list[buffer_idx], torch.Tensor):
|
153
|
+
if tensor_list[buffer_idx].dtype != torch_dtype:
|
154
|
+
raise ValueError(f"Buffer dtype mismatch for parameter {param}: expected {torch_dtype}, got {tensor_list[buffer_idx].dtype}")
|
155
|
+
|
156
|
+
# Check static shape map
|
157
|
+
for param, (buffer_idx, shape_list) in self.static_shape_map.items():
|
158
|
+
if isinstance(tensor_list[buffer_idx], torch.Tensor):
|
159
|
+
for shape_idx, shape in shape_list:
|
160
|
+
if tensor_list[buffer_idx].shape[shape_idx] != shape:
|
161
|
+
raise ValueError(f"Static shape mismatch for parameter {param}: expected {shape} at index {shape_idx}, got {tensor_list[buffer_idx].shape}")
|
161
162
|
|
162
163
|
# Add dynamic dimension values to kernel arguments
|
163
164
|
for _, (buffer_idx, shape_idx) in self.dynamic_symbolic_map.items():
|
tilelang/lib/libtilelang.so
CHANGED
Binary file
|
Binary file
|
tilelang/lib/libtvm.so
CHANGED
Binary file
|
tilelang/lib/libtvm_runtime.so
CHANGED
Binary file
|
@@ -99,3 +99,16 @@ TL_DEVICE unsigned __pack_half2(const half_t x, const half_t y) {
|
|
99
99
|
unsigned v1 = *((unsigned short *)&y);
|
100
100
|
return (v1 << 16) | v0;
|
101
101
|
}
|
102
|
+
|
103
|
+
// Pack two bfloat16_t values.
|
104
|
+
TL_DEVICE unsigned __pack_bfloat162(const bfloat16_t x, const bfloat16_t y) {
|
105
|
+
unsigned v0 = *((unsigned short *)&x);
|
106
|
+
unsigned v1 = *((unsigned short *)&y);
|
107
|
+
return (v1 << 16) | v0;
|
108
|
+
}
|
109
|
+
|
110
|
+
|
111
|
+
template <typename T1, typename T2>
|
112
|
+
TL_DEVICE void AtomicAdd(T1 *address, T2 val) {
|
113
|
+
atomicAdd(reinterpret_cast<T1 *>(address), static_cast<T1>(val));
|
114
|
+
}
|
@@ -16,3 +16,33 @@ struct __align__(16) fp8_e4_16_t {
|
|
16
16
|
fp8_e4_8_t x;
|
17
17
|
fp8_e4_8_t y;
|
18
18
|
};
|
19
|
+
|
20
|
+
__device__ fp8_e4_4_t make_fp8_e4_4_t(fp8_e4_t x, fp8_e4_t y, fp8_e4_t z,
|
21
|
+
fp8_e4_t w) {
|
22
|
+
// reinterpret the 4 fp8_e4_t values to signed char value and shift
|
23
|
+
signed char x_char = *reinterpret_cast<signed char *>(&x);
|
24
|
+
signed char y_char = *reinterpret_cast<signed char *>(&y);
|
25
|
+
signed char z_char = *reinterpret_cast<signed char *>(&z);
|
26
|
+
signed char w_char = *reinterpret_cast<signed char *>(&w);
|
27
|
+
int res = (w_char << 24) | (z_char << 16) | (y_char << 8) | x_char;
|
28
|
+
return *reinterpret_cast<fp8_e4_4_t *>(&res);
|
29
|
+
}
|
30
|
+
|
31
|
+
__device__ fp8_e4_8_t make_fp8_e4_8_t(fp8_e4_t x, fp8_e4_t y, fp8_e4_t z,
|
32
|
+
fp8_e4_t w, fp8_e4_t v, fp8_e4_t u,
|
33
|
+
fp8_e4_t t, fp8_e4_t s) {
|
34
|
+
signed char x_char = *reinterpret_cast<signed char *>(&x);
|
35
|
+
signed char y_char = *reinterpret_cast<signed char *>(&y);
|
36
|
+
signed char z_char = *reinterpret_cast<signed char *>(&z);
|
37
|
+
signed char w_char = *reinterpret_cast<signed char *>(&w);
|
38
|
+
signed char v_char = *reinterpret_cast<signed char *>(&v);
|
39
|
+
signed char u_char = *reinterpret_cast<signed char *>(&u);
|
40
|
+
signed char t_char = *reinterpret_cast<signed char *>(&t);
|
41
|
+
signed char s_char = *reinterpret_cast<signed char *>(&s);
|
42
|
+
int a = (w_char << 24) | (z_char << 16) | (y_char << 8) | x_char;
|
43
|
+
int b = (s_char << 24) | (t_char << 16) | (u_char << 8) | v_char;
|
44
|
+
fp8_e4_8_t res;
|
45
|
+
res.x = *reinterpret_cast<fp8_e4_4_t *>(&a);
|
46
|
+
res.y = *reinterpret_cast<fp8_e4_4_t *>(&b);
|
47
|
+
return res;
|
48
|
+
}
|
@@ -1,7 +1,7 @@
|
|
1
1
|
tilelang/CMakeLists.txt,sha256=xJhnusYZI4UhD_fzseGH3Tn2BeovUzz3aWUwPq-WU0Y,7010
|
2
2
|
tilelang/LICENSE,sha256=v9fVeAgRKQXc5ySwTns767gj0-dHN9XYPpGURkAVAXs,1127
|
3
3
|
tilelang/README.md,sha256=1RC_2IUBY-p0BR-d2xkNXC8zrva8-U3AVkmCozkssbY,11924
|
4
|
-
tilelang/VERSION,sha256=
|
4
|
+
tilelang/VERSION,sha256=Axg99nX6i4uA5L-33bP_V3uMQL8--B3WbQ83pCXrIfg,13
|
5
5
|
tilelang/__init__.py,sha256=yH0BknCRnFQN-E7d6p1HPNbeY4o3COqG7XzR_EJpbTo,3215
|
6
6
|
tilelang/_ffi_api.py,sha256=D-HfDxx8EZq6qItftg-ejOhpC_smIZLN-pWPVCNX_UM,243
|
7
7
|
tilelang/config.cmake,sha256=370i6N3wwi7-LPGZDBtiiu54UWp39ndD-9lCurLhHwI,14330
|
@@ -6280,7 +6280,7 @@ tilelang/3rdparty/tvm/src/topi/reduction.cc,sha256=vjnvD9lnmpO57_CNXLizVnimXRffm
|
|
6280
6280
|
tilelang/3rdparty/tvm/src/topi/schedule.cc,sha256=pazCd8PkTy57jksqinRZVPvJ18iciI0dJXRA0YRACqI,12522
|
6281
6281
|
tilelang/3rdparty/tvm/src/topi/transform.cc,sha256=j9doFFms3ba_1r-Wt-t9Wq953ze-DJeO9uskDtlsejc,7862
|
6282
6282
|
tilelang/3rdparty/tvm/src/topi/vision.cc,sha256=1c0j7VN6orhaRF980GZY8ynLOT66Jq8SFk0GIEk-nrw,1245
|
6283
|
-
tilelang/autotuner/__init__.py,sha256=
|
6283
|
+
tilelang/autotuner/__init__.py,sha256=eCbbYp0Gz_o8Yinq5N6xV8y4siQ59ay0b3Ilq773i4I,24519
|
6284
6284
|
tilelang/autotuner/param.py,sha256=e_vRYHXjKHEsloJ0vwhC4Peqa2vN640DFS3bkQyCrfQ,11802
|
6285
6285
|
tilelang/cache/__init__.py,sha256=ZNBkLL3ssxtMMq8LFTThreL4fSGYbiVLcyD7YH5bb8U,1869
|
6286
6286
|
tilelang/cache/kernel_cache.py,sha256=VZxu3vnFML-Der37ujZlAgapSitQvFYWeixBgIQrGms,13129
|
@@ -6329,10 +6329,10 @@ tilelang/engine/__init__.py,sha256=8veNHGeKyssdX_3ytZEsu0jKHPlP5ygBMUhXXTkeUHc,2
|
|
6329
6329
|
tilelang/engine/callback.py,sha256=zB_lp86j7jVPXnwSg0LMMWltxmkJQiU0wlxFWK-sxf4,3275
|
6330
6330
|
tilelang/engine/lower.py,sha256=OJX7d_qk6WXA9VDF-fTTKw1q-ZnBkn4tuD9lA4TPdxk,8923
|
6331
6331
|
tilelang/engine/param.py,sha256=5eWc48aao84WIrbtaLuAYGrb3RE1SyiG6hIy8DwlryI,3943
|
6332
|
-
tilelang/engine/phase.py,sha256=
|
6332
|
+
tilelang/engine/phase.py,sha256=vt0BzpkYDsXfSHH2BPqSfGQwhiF6TWm9HwgQVKsr1DQ,7228
|
6333
6333
|
tilelang/intrinsics/__init__.py,sha256=ymvtsKjVY0f_9k-QIMtO4CEh6hEnG7H4NiW3buNLVQg,501
|
6334
6334
|
tilelang/intrinsics/mfma_layout.py,sha256=O5jh8gfILH-ASDss0pvTZhJq2jgmcyHwRPzigxGoGW8,4056
|
6335
|
-
tilelang/intrinsics/mfma_macro_generator.py,sha256=
|
6335
|
+
tilelang/intrinsics/mfma_macro_generator.py,sha256=FMTAgq249F8APp8Ms8rAU-PPCX5Att_9Hu76ZyIvE64,18141
|
6336
6336
|
tilelang/intrinsics/mma_layout.py,sha256=eHFiNKd3zKzNFuRrpZdEQx0apbHWj8Ak6Q3e9_CeDiM,5090
|
6337
6337
|
tilelang/intrinsics/mma_macro_generator.py,sha256=BVx3Bt3K67XSTp-Op425OrPxmtD43jyLre0wY8AeW-w,44478
|
6338
6338
|
tilelang/intrinsics/utils.py,sha256=dbQpWOy0F4rg3WotzHQToPtJgY2BLtRy1CKkSnrs--k,4243
|
@@ -6349,8 +6349,8 @@ tilelang/jit/adapter/wrapper.py,sha256=MJsMJxR-Lg9XLNkgMV_RbhQIuPPC0QAOc94H6JIkM
|
|
6349
6349
|
tilelang/jit/adapter/ctypes/__init__.py,sha256=WA38dJGWPXQbOu_rnU1wgckOiVTMzzvxGEL7x1naYM0,127
|
6350
6350
|
tilelang/jit/adapter/ctypes/adapter.py,sha256=mpqlT2Qh0WPUfWuvdO6fFAAXF2mfqbKUcltCU-M-apE,11365
|
6351
6351
|
tilelang/jit/adapter/cython/__init__.py,sha256=v-6kWB8ktC1MUzsCkyYsDM34asotUIGXopEco5nqUVE,127
|
6352
|
-
tilelang/jit/adapter/cython/adapter.py,sha256=
|
6353
|
-
tilelang/jit/adapter/cython/cython_wrapper.pyx,sha256=
|
6352
|
+
tilelang/jit/adapter/cython/adapter.py,sha256=m_mRDIzWnE-lXlX29KYUiOtbLUvut4ctZU5nlQ0vKoU,19621
|
6353
|
+
tilelang/jit/adapter/cython/cython_wrapper.pyx,sha256=HO704otcqNIV5Q4Et9jyfS4WLK5QwGlyiLv6aZJMbzU,8738
|
6354
6354
|
tilelang/language/__init__.py,sha256=xnBuLhX29CjAqo6B1fiF1ctWTDjYPhW-wim9R0AxVFc,5906
|
6355
6355
|
tilelang/language/allocate.py,sha256=YeZERWoqdZXaYbw58hoJ61VxrD3zsiI1sJIyg6vNy38,2855
|
6356
6356
|
tilelang/language/builtin.py,sha256=sLg88K7qkI3jP2ks6TR3YULlCiU9f_cWJ9c79Pi_M5w,9959
|
@@ -6383,10 +6383,10 @@ tilelang/layout/__init__.py,sha256=F1wr9yBG9GW84h8KWXz-hRJFfqyZuY0EKSrG08KyrWQ,2
|
|
6383
6383
|
tilelang/layout/fragment.py,sha256=zTv9P96lsYi9BWc5pxR4PA2Z5RSDGP7D5uJCiNw7_oc,8445
|
6384
6384
|
tilelang/layout/layout.py,sha256=20CWxz_S8k_WNvWiR4gdIrEsQ36e5bsnOEqmu4zGk_c,4311
|
6385
6385
|
tilelang/layout/swizzle.py,sha256=PMqu_s1sNCh9uo8eDs5qmLKXnDqZwv34GT3H9D4YDO0,438
|
6386
|
-
tilelang/lib/libtilelang.so,sha256=
|
6387
|
-
tilelang/lib/libtilelang_module.so,sha256=
|
6388
|
-
tilelang/lib/libtvm.so,sha256=
|
6389
|
-
tilelang/lib/libtvm_runtime.so,sha256=
|
6386
|
+
tilelang/lib/libtilelang.so,sha256=ZKzH7wCpSuIPThuUIfB1brqjN6lE4IGKTNqfJJNwliI,5040720
|
6387
|
+
tilelang/lib/libtilelang_module.so,sha256=E23iuTGfHqKQOq8mp-L7Gx91zeCnXZLRC-wZhUNDqUs,5040720
|
6388
|
+
tilelang/lib/libtvm.so,sha256=TMs6mrRyGE9_HJ8ZoxE_gBrOgmQX_VplIu0fXeIZ9-o,83982112
|
6389
|
+
tilelang/lib/libtvm_runtime.so,sha256=a2o2Zu3-6wPZZXlcEVeCIjIdJYihgv4fw26aLWnLQ8Y,4794680
|
6390
6390
|
tilelang/math/__init__.py,sha256=JC4fqrU_LV_wDErti-wHNr4j6_mqP1PsK0qqkhaSzRU,209
|
6391
6391
|
tilelang/primitives/__init__.py,sha256=10gQN3QWUFM1nkGXY46QFcWUXxwsKMsVn23JdyFHil4,167
|
6392
6392
|
tilelang/primitives/gemm/__init__.py,sha256=j62ObmbL5Q6m3lSouNBQDk1hZZRnSp4UNNCCaSlKYXU,1658
|
@@ -6416,11 +6416,11 @@ tilelang/src/tl_templates/cuda/gemm_sm90.h,sha256=S3v63snxR_3TEg8LLCoR6cqGVgii8k
|
|
6416
6416
|
tilelang/src/tl_templates/cuda/ldsm.h,sha256=TxCxYVzUK4tvUNVqULCL5HEaAuW9vOv0_-QYmoRFUkM,5053
|
6417
6417
|
tilelang/src/tl_templates/cuda/reduce.h,sha256=U9mKcHSttin1FQ0BohpaP0bHvgPvb3t-czwTuDeK5-8,4394
|
6418
6418
|
tilelang/src/tl_templates/cuda/threadblock_swizzle.h,sha256=GIXQwC1gzwUhnq4CzORHh5hA_QHVfMrOcUeGTy1Fon8,1945
|
6419
|
-
tilelang/src/tl_templates/hip/common.h,sha256=
|
6419
|
+
tilelang/src/tl_templates/hip/common.h,sha256=C4O6p1bStNX3hmvZm8QLiWJphTf-4a8OirZmOEnBhBk,3967
|
6420
6420
|
tilelang/src/tl_templates/hip/copy.h,sha256=fGHkbe4ReXoEtIWrgQ-mlCycaIL65SvNGWK1OJZdUQo,3324
|
6421
6421
|
tilelang/src/tl_templates/hip/debug.h,sha256=9xGr4ka5x_nvY55XwbgTJFFwEnd09ta9jAZwjHyQau0,8231
|
6422
6422
|
tilelang/src/tl_templates/hip/gemm.h,sha256=lYeOjV8OG2oZbcS7ByzOudE7i0FQJ71mrUcImkfhTrg,11610
|
6423
|
-
tilelang/src/tl_templates/hip/hip_fp8.h,sha256=
|
6423
|
+
tilelang/src/tl_templates/hip/hip_fp8.h,sha256=eFYV5OyHI2X5xMNOjv1NLqR2shlKkhiGQWiVEiY6LPs,1903
|
6424
6424
|
tilelang/src/tl_templates/hip/ldsm.h,sha256=gRx_bSdsCsgcVumwUJwOnv4HuHruU2kC9TE9x_jo8k0,106
|
6425
6425
|
tilelang/src/tl_templates/hip/reduce.h,sha256=-VKpG-TNbzPHIqsSReYpqZoM-oXFzIx6fMeBieV26Kc,1372
|
6426
6426
|
tilelang/src/tl_templates/hip/threadblock_swizzle.h,sha256=HPuOUl2Ts76ckY5kVkLlfaK2WyHY0pEXskbXnnefFcA,2055
|
@@ -6437,8 +6437,8 @@ tilelang/utils/deprecated.py,sha256=CiZ9y_76_dZ24SFDdasDiLmibwi6xO2Gdj6WzTWU0Qg,
|
|
6437
6437
|
tilelang/utils/language.py,sha256=KUzUZ8Z2x1np0Hu_MrjWOIcRrVAZHX90li1Xw9fYZXY,3291
|
6438
6438
|
tilelang/utils/target.py,sha256=P-74pdCLWcp2MZMQUoPIFwKF1NZ1QT-L0VroIL8m2to,2486
|
6439
6439
|
tilelang/utils/tensor.py,sha256=SZ4ewoJ-Mq3zg8zIHS7-XLUmYDdlNwh841yUkjnQtNU,12573
|
6440
|
-
tilelang_rocm-0.1.4.
|
6441
|
-
tilelang_rocm-0.1.4.
|
6442
|
-
tilelang_rocm-0.1.4.
|
6443
|
-
tilelang_rocm-0.1.4.
|
6444
|
-
tilelang_rocm-0.1.4.
|
6440
|
+
tilelang_rocm-0.1.4.post12.dist-info/licenses/LICENSE,sha256=v9fVeAgRKQXc5ySwTns767gj0-dHN9XYPpGURkAVAXs,1127
|
6441
|
+
tilelang_rocm-0.1.4.post12.dist-info/METADATA,sha256=NTJMFhxd7258jXSi3h5eBbJNpO0HpAfXviyafU8ouvQ,13076
|
6442
|
+
tilelang_rocm-0.1.4.post12.dist-info/WHEEL,sha256=0-G7woG4LgutcYzUGJCOYFgoh749-FtfhSMeIPLVGS0,104
|
6443
|
+
tilelang_rocm-0.1.4.post12.dist-info/top_level.txt,sha256=qvMq-AYkDVggI-9VIAzCe5CXHl66IEWj7J29-JbuFsI,21
|
6444
|
+
tilelang_rocm-0.1.4.post12.dist-info/RECORD,,
|
File without changes
|
{tilelang_rocm-0.1.4.post10.dist-info → tilelang_rocm-0.1.4.post12.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|