tilelang-rocm 0.1.4.post11__cp310-cp310-manylinux1_x86_64.whl → 0.1.4.post12__cp310-cp310-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tilelang/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.4.post11
1
+ 0.1.4.post12
@@ -167,7 +167,7 @@ class AutoTuner:
167
167
  max_mismatched_ratio: float = 0.01,
168
168
  skip_check: bool = False,
169
169
  manual_check_prog: Callable = None,
170
- cache_input_tensors: bool = True):
170
+ cache_input_tensors: bool = False):
171
171
  """Set profiling arguments for the auto-tuner.
172
172
 
173
173
  Args:
@@ -463,8 +463,26 @@ class _AutoTunerImplementation:
463
463
  rep: int = 100
464
464
  timeout: int = 100
465
465
  configs: Any = None
466
-
467
- def __init__(self, configs: Any, warmup: int = 25, rep: int = 100, timeout: int = 100) -> None:
466
+ supply_type: tilelang.TensorSupplyType = tilelang.TensorSupplyType.Auto
467
+ ref_prog: Callable = None
468
+ supply_prog: Callable = None
469
+ rtol: float = 1e-2
470
+ atol: float = 1e-2
471
+ max_mismatched_ratio: float = 0.01
472
+ skip_check: bool = False
473
+ manual_check_prog: Callable = None
474
+ cache_input_tensors: bool = False
475
+
476
+ def __init__(self, configs: Any, warmup: int = 25, rep: int = 100, timeout: int = 100,
477
+ supply_type: tilelang.TensorSupplyType = tilelang.TensorSupplyType.Auto,
478
+ ref_prog: Callable = None,
479
+ supply_prog: Callable = None,
480
+ rtol: float = 1e-2,
481
+ atol: float = 1e-2,
482
+ max_mismatched_ratio: float = 0.01,
483
+ skip_check: bool = False,
484
+ manual_check_prog: Callable = None,
485
+ cache_input_tensors: bool = False) -> None:
468
486
  """Initialize the AutoTunerImplementation.
469
487
 
470
488
  Args:
@@ -509,7 +527,17 @@ class _AutoTunerImplementation:
509
527
  def jit_compile(**config_arg):
510
528
  return fn(*args, **kwargs, __tune_params=config_arg)
511
529
 
512
- autotuner = AutoTuner(fn, configs=configs)
530
+ autotuner = AutoTuner(fn, configs=configs).set_profile_args(
531
+ supply_type=self.supply_type,
532
+ ref_prog=self.ref_prog,
533
+ supply_prog=self.supply_prog,
534
+ rtol=self.rtol,
535
+ atol=self.atol,
536
+ max_mismatched_ratio=self.max_mismatched_ratio,
537
+ skip_check=self.skip_check,
538
+ manual_check_prog=self.manual_check_prog,
539
+ cache_input_tensors=self.cache_input_tensors,
540
+ )
513
541
  autotuner.jit_compile = jit_compile
514
542
  autotuner.run = partial(autotuner.run, warmup, rep, timeout)
515
543
 
@@ -525,9 +553,21 @@ def autotune( # This is the new public interface
525
553
  func: Union[Callable[_P, _RProg], PrimFunc, None] = None,
526
554
  *, # Indicates subsequent arguments are keyword-only
527
555
  configs: Any,
556
+ # profile arguments
528
557
  warmup: int = 25,
529
558
  rep: int = 100,
530
- timeout: int = 100):
559
+ timeout: int = 100,
560
+ # compile arguments
561
+ supply_type: tilelang.TensorSupplyType = tilelang.TensorSupplyType.Auto,
562
+ ref_prog: Callable = None,
563
+ supply_prog: Callable = None,
564
+ rtol: float = 1e-2,
565
+ atol: float = 1e-2,
566
+ max_mismatched_ratio: float = 0.01,
567
+ skip_check: bool = False,
568
+ manual_check_prog: Callable = None,
569
+ cache_input_tensors: bool = False,
570
+ ):
531
571
  """
532
572
  Just-In-Time (JIT) compiler decorator for TileLang functions.
533
573
 
@@ -571,5 +611,15 @@ def autotune( # This is the new public interface
571
611
  # Create a _AutoTunerImplementation instance with the provided/defaulted arguments.
572
612
  # This instance is a decorator that will be applied to the function later.
573
613
  configured_decorator = _AutoTunerImplementation(
574
- configs=configs, warmup=warmup, rep=rep, timeout=timeout)
614
+ configs=configs, warmup=warmup, rep=rep, timeout=timeout,
615
+ supply_type=supply_type,
616
+ ref_prog=ref_prog,
617
+ supply_prog=supply_prog,
618
+ rtol=rtol,
619
+ atol=atol,
620
+ max_mismatched_ratio=max_mismatched_ratio,
621
+ skip_check=skip_check,
622
+ manual_check_prog=manual_check_prog,
623
+ cache_input_tensors=cache_input_tensors,
624
+ )
575
625
  return configured_decorator
tilelang/engine/phase.py CHANGED
@@ -142,13 +142,14 @@ def OptimizeForTarget(mod: IRModule, target: Target) -> IRModule:
142
142
  mod = tilelang.transform.AnnotateDeviceRegions()(mod)
143
143
  mod = tir.transform.SplitHostDevice()(mod)
144
144
 
145
- if allow_warp_specialized(pass_ctx=pass_ctx, target=target):
146
- # This is a workaround to avoid the bug in the MergeSharedMemoryAllocations pass
147
- # when warp specialization is enabled, as different warp threads may access different
148
- # buffers, but the liveness analysis is hard because we need to do pipeline.
149
- mod = tir.transform.MergeSharedMemoryAllocations()(mod)
150
- else:
151
- mod = tilelang.transform.MergeSharedMemoryAllocations()(mod)
145
+ mod = tir.transform.MergeSharedMemoryAllocations()(mod)
146
+ # if allow_warp_specialized(pass_ctx=pass_ctx, target=target):
147
+ # # This is a workaround to avoid the bug in the MergeSharedMemoryAllocations pass
148
+ # # when warp specialization is enabled, as different warp threads may access different
149
+ # # buffers, but the liveness analysis is hard because we need to do pipeline.
150
+ # mod = tir.transform.MergeSharedMemoryAllocations()(mod)
151
+ # else:
152
+ # mod = tilelang.transform.MergeSharedMemoryAllocations()(mod)
152
153
 
153
154
  mod = tilelang.transform.ThreadSync("shared")(mod)
154
155
  mod = tilelang.transform.ThreadSync("shared.dyn")(mod)
@@ -358,6 +358,8 @@ class MatrixCoreIntrinEmitter(object):
358
358
  BLOCK_M = block_row_warps * warp_rows
359
359
  BLOCK_N = block_col_warps * warp_cols
360
360
  M_DIM, N_DIM = self.M_DIM, self.N_DIM
361
+ C_buf_dims = len(C_buf.shape)
362
+ assert C_buf_dims in {2, 4}, "C_buf should be 2D or 4D"
361
363
 
362
364
  # STS
363
365
  # MMA Store must be in simulated instead of TVM Intrins
@@ -369,9 +371,15 @@ class MatrixCoreIntrinEmitter(object):
369
371
  for i, j in T.grid(warp_rows, warp_cols):
370
372
  for local_id in T.vectorized(local_size_out):
371
373
  row, col = T.meta_var(mfma_store_index_map(tx, local_id))
372
- C_buf[warp_m * warp_rows + i, warp_n * warp_cols + j, row,
373
- col] = C_local_buf[i * warp_cols * local_size_out + j * local_size_out +
374
- local_id]
374
+ if C_buf_dims == 2:
375
+ C_buf[(warp_m * warp_rows + i) * M_DIM + row,
376
+ (warp_n * warp_cols + j) * N_DIM +
377
+ col] = C_local_buf[i * (warp_cols * local_size_out) +
378
+ j * local_size_out + local_id]
379
+ else:
380
+ C_buf[warp_m * warp_rows + i, warp_n * warp_cols + j, row,
381
+ col] = C_local_buf[i * warp_cols * local_size_out + j * local_size_out +
382
+ local_id]
375
383
 
376
384
  @T.macro
377
385
  def _warp_stmatrix_global(C_local_buf, C_buf, thread_binding):
@@ -432,8 +432,8 @@ class CythonKernelAdapter(BaseKernelAdapter):
432
432
  def _convert_torch_func(self) -> Callable:
433
433
  """Returns a PyTorch-compatible function wrapper for the kernel."""
434
434
 
435
- def lambda_forward(*args, stream: int = -1):
436
- return self.cython_wrapper.forward([*args], stream=stream)
435
+ def lambda_forward(*args, stream: int = -1, skip_check: bool = False):
436
+ return self.cython_wrapper.forward([*args], stream=stream, skip_check=skip_check)
437
437
 
438
438
  return lambda_forward
439
439
 
@@ -66,7 +66,7 @@ cdef class CythonKernelWrapper:
66
66
  self.buffer_device_map = buffer_device_map
67
67
  return self
68
68
 
69
- cpdef forward(self, list inputs, int64_t stream = -1):
69
+ cpdef forward(self, list inputs, int64_t stream = -1, bint skip_check = False):
70
70
  # Validate input dimensions and prepare for kernel execution
71
71
  cdef int total_params = len(self.params)
72
72
  cdef int total_inputs = len(inputs)
@@ -135,29 +135,30 @@ cdef class CythonKernelWrapper:
135
135
  raise ValueError(f"Unsupported tensor type: {type(tensor)}")
136
136
 
137
137
  # Check buffer device
138
- # cdef str tensor_list_device_type = tensor_list[0].device.type
139
- if isinstance(tensor_list[0], torch.Tensor):
140
- tensor_list_device_type = tensor_list[0].device.type
141
- for param, (buffer_idx, device) in self.buffer_device_map.items():
142
- if isinstance(tensor_list[buffer_idx], torch.Tensor):
143
- tensor_device = tensor_list[buffer_idx].device
144
- # Compare device types and indices separately to handle both string and torch.device objects
145
- if (tensor_list_device_type != device.type or
146
- (tensor_device.index is not None and device.index is not None and tensor_device.index != device.index)):
147
- raise ValueError(f"Buffer device mismatch for parameter {param}: expected {device}, got {tensor_device}")
148
-
149
- # Check buffer dtype map
150
- for param, (buffer_idx, torch_dtype) in self.buffer_dtype_map.items():
151
- if isinstance(tensor_list[buffer_idx], torch.Tensor):
152
- if tensor_list[buffer_idx].dtype != torch_dtype:
153
- raise ValueError(f"Buffer dtype mismatch for parameter {param}: expected {torch_dtype}, got {tensor_list[buffer_idx].dtype}")
154
-
155
- # Check static shape map
156
- for param, (buffer_idx, shape_list) in self.static_shape_map.items():
157
- if isinstance(tensor_list[buffer_idx], torch.Tensor):
158
- for shape_idx, shape in shape_list:
159
- if tensor_list[buffer_idx].shape[shape_idx] != shape:
160
- raise ValueError(f"Static shape mismatch for parameter {param}: expected {shape} at index {shape_idx}, got {tensor_list[buffer_idx].shape}")
138
+ if not skip_check:
139
+ # cdef str tensor_list_device_type = tensor_list[0].device.type
140
+ if isinstance(tensor_list[0], torch.Tensor):
141
+ tensor_list_device_type = tensor_list[0].device.type
142
+ for param, (buffer_idx, device) in self.buffer_device_map.items():
143
+ if isinstance(tensor_list[buffer_idx], torch.Tensor):
144
+ tensor_device = tensor_list[buffer_idx].device
145
+ # Compare device types and indices separately to handle both string and torch.device objects
146
+ if (tensor_list_device_type != device.type or
147
+ (tensor_device.index is not None and device.index is not None and tensor_device.index != device.index)):
148
+ raise ValueError(f"Buffer device mismatch for parameter {param}: expected {device}, got {tensor_device}")
149
+
150
+ # Check buffer dtype map
151
+ for param, (buffer_idx, torch_dtype) in self.buffer_dtype_map.items():
152
+ if isinstance(tensor_list[buffer_idx], torch.Tensor):
153
+ if tensor_list[buffer_idx].dtype != torch_dtype:
154
+ raise ValueError(f"Buffer dtype mismatch for parameter {param}: expected {torch_dtype}, got {tensor_list[buffer_idx].dtype}")
155
+
156
+ # Check static shape map
157
+ for param, (buffer_idx, shape_list) in self.static_shape_map.items():
158
+ if isinstance(tensor_list[buffer_idx], torch.Tensor):
159
+ for shape_idx, shape in shape_list:
160
+ if tensor_list[buffer_idx].shape[shape_idx] != shape:
161
+ raise ValueError(f"Static shape mismatch for parameter {param}: expected {shape} at index {shape_idx}, got {tensor_list[buffer_idx].shape}")
161
162
 
162
163
  # Add dynamic dimension values to kernel arguments
163
164
  for _, (buffer_idx, shape_idx) in self.dynamic_symbolic_map.items():
Binary file
Binary file
tilelang/lib/libtvm.so CHANGED
Binary file
Binary file
@@ -99,3 +99,16 @@ TL_DEVICE unsigned __pack_half2(const half_t x, const half_t y) {
99
99
  unsigned v1 = *((unsigned short *)&y);
100
100
  return (v1 << 16) | v0;
101
101
  }
102
+
103
+ // Pack two bfloat16_t values.
104
+ TL_DEVICE unsigned __pack_bfloat162(const bfloat16_t x, const bfloat16_t y) {
105
+ unsigned v0 = *((unsigned short *)&x);
106
+ unsigned v1 = *((unsigned short *)&y);
107
+ return (v1 << 16) | v0;
108
+ }
109
+
110
+
111
+ template <typename T1, typename T2>
112
+ TL_DEVICE void AtomicAdd(T1 *address, T2 val) {
113
+ atomicAdd(reinterpret_cast<T1 *>(address), static_cast<T1>(val));
114
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tilelang-rocm
3
- Version: 0.1.4.post11
3
+ Version: 0.1.4.post12
4
4
  Summary: A tile level programming language to generate high performance code.
5
5
  Home-page: https://github.com/tile-ai/tilelang
6
6
  Author: Microsoft Research
@@ -1,7 +1,7 @@
1
1
  tilelang/CMakeLists.txt,sha256=xJhnusYZI4UhD_fzseGH3Tn2BeovUzz3aWUwPq-WU0Y,7010
2
2
  tilelang/LICENSE,sha256=v9fVeAgRKQXc5ySwTns767gj0-dHN9XYPpGURkAVAXs,1127
3
3
  tilelang/README.md,sha256=1RC_2IUBY-p0BR-d2xkNXC8zrva8-U3AVkmCozkssbY,11924
4
- tilelang/VERSION,sha256=ArXa5-BJWz0tYmntP_b1UkmqBWmkVJ5sMUiAaXMSrkA,13
4
+ tilelang/VERSION,sha256=Axg99nX6i4uA5L-33bP_V3uMQL8--B3WbQ83pCXrIfg,13
5
5
  tilelang/__init__.py,sha256=yH0BknCRnFQN-E7d6p1HPNbeY4o3COqG7XzR_EJpbTo,3215
6
6
  tilelang/_ffi_api.py,sha256=D-HfDxx8EZq6qItftg-ejOhpC_smIZLN-pWPVCNX_UM,243
7
7
  tilelang/config.cmake,sha256=370i6N3wwi7-LPGZDBtiiu54UWp39ndD-9lCurLhHwI,14330
@@ -6280,7 +6280,7 @@ tilelang/3rdparty/tvm/src/topi/reduction.cc,sha256=vjnvD9lnmpO57_CNXLizVnimXRffm
6280
6280
  tilelang/3rdparty/tvm/src/topi/schedule.cc,sha256=pazCd8PkTy57jksqinRZVPvJ18iciI0dJXRA0YRACqI,12522
6281
6281
  tilelang/3rdparty/tvm/src/topi/transform.cc,sha256=j9doFFms3ba_1r-Wt-t9Wq953ze-DJeO9uskDtlsejc,7862
6282
6282
  tilelang/3rdparty/tvm/src/topi/vision.cc,sha256=1c0j7VN6orhaRF980GZY8ynLOT66Jq8SFk0GIEk-nrw,1245
6283
- tilelang/autotuner/__init__.py,sha256=7c5xFLUtqUjFUNmmxutr5G69Eotn51dQuDtvhlC1-ks,22446
6283
+ tilelang/autotuner/__init__.py,sha256=eCbbYp0Gz_o8Yinq5N6xV8y4siQ59ay0b3Ilq773i4I,24519
6284
6284
  tilelang/autotuner/param.py,sha256=e_vRYHXjKHEsloJ0vwhC4Peqa2vN640DFS3bkQyCrfQ,11802
6285
6285
  tilelang/cache/__init__.py,sha256=ZNBkLL3ssxtMMq8LFTThreL4fSGYbiVLcyD7YH5bb8U,1869
6286
6286
  tilelang/cache/kernel_cache.py,sha256=VZxu3vnFML-Der37ujZlAgapSitQvFYWeixBgIQrGms,13129
@@ -6329,10 +6329,10 @@ tilelang/engine/__init__.py,sha256=8veNHGeKyssdX_3ytZEsu0jKHPlP5ygBMUhXXTkeUHc,2
6329
6329
  tilelang/engine/callback.py,sha256=zB_lp86j7jVPXnwSg0LMMWltxmkJQiU0wlxFWK-sxf4,3275
6330
6330
  tilelang/engine/lower.py,sha256=OJX7d_qk6WXA9VDF-fTTKw1q-ZnBkn4tuD9lA4TPdxk,8923
6331
6331
  tilelang/engine/param.py,sha256=5eWc48aao84WIrbtaLuAYGrb3RE1SyiG6hIy8DwlryI,3943
6332
- tilelang/engine/phase.py,sha256=SPruc1tsC_yFD9q93pilkFmzY3UZ2YJTCRH06YPDI1Q,7154
6332
+ tilelang/engine/phase.py,sha256=vt0BzpkYDsXfSHH2BPqSfGQwhiF6TWm9HwgQVKsr1DQ,7228
6333
6333
  tilelang/intrinsics/__init__.py,sha256=ymvtsKjVY0f_9k-QIMtO4CEh6hEnG7H4NiW3buNLVQg,501
6334
6334
  tilelang/intrinsics/mfma_layout.py,sha256=O5jh8gfILH-ASDss0pvTZhJq2jgmcyHwRPzigxGoGW8,4056
6335
- tilelang/intrinsics/mfma_macro_generator.py,sha256=axkPrTN-Lplt0xFMk-ckQu8Qynve-lNl6thQCIwY1Q0,17653
6335
+ tilelang/intrinsics/mfma_macro_generator.py,sha256=FMTAgq249F8APp8Ms8rAU-PPCX5Att_9Hu76ZyIvE64,18141
6336
6336
  tilelang/intrinsics/mma_layout.py,sha256=eHFiNKd3zKzNFuRrpZdEQx0apbHWj8Ak6Q3e9_CeDiM,5090
6337
6337
  tilelang/intrinsics/mma_macro_generator.py,sha256=BVx3Bt3K67XSTp-Op425OrPxmtD43jyLre0wY8AeW-w,44478
6338
6338
  tilelang/intrinsics/utils.py,sha256=dbQpWOy0F4rg3WotzHQToPtJgY2BLtRy1CKkSnrs--k,4243
@@ -6349,8 +6349,8 @@ tilelang/jit/adapter/wrapper.py,sha256=MJsMJxR-Lg9XLNkgMV_RbhQIuPPC0QAOc94H6JIkM
6349
6349
  tilelang/jit/adapter/ctypes/__init__.py,sha256=WA38dJGWPXQbOu_rnU1wgckOiVTMzzvxGEL7x1naYM0,127
6350
6350
  tilelang/jit/adapter/ctypes/adapter.py,sha256=mpqlT2Qh0WPUfWuvdO6fFAAXF2mfqbKUcltCU-M-apE,11365
6351
6351
  tilelang/jit/adapter/cython/__init__.py,sha256=v-6kWB8ktC1MUzsCkyYsDM34asotUIGXopEco5nqUVE,127
6352
- tilelang/jit/adapter/cython/adapter.py,sha256=6XNStHYZOANtrGTqh3o0N6-FJbdyiEfCdtQmV3q4-aQ,19572
6353
- tilelang/jit/adapter/cython/cython_wrapper.pyx,sha256=d8tp6MIed4BpiQHLsiDlX4P1GJ2yfrQ8bDnMR9CLXVc,8598
6352
+ tilelang/jit/adapter/cython/adapter.py,sha256=m_mRDIzWnE-lXlX29KYUiOtbLUvut4ctZU5nlQ0vKoU,19621
6353
+ tilelang/jit/adapter/cython/cython_wrapper.pyx,sha256=HO704otcqNIV5Q4Et9jyfS4WLK5QwGlyiLv6aZJMbzU,8738
6354
6354
  tilelang/language/__init__.py,sha256=xnBuLhX29CjAqo6B1fiF1ctWTDjYPhW-wim9R0AxVFc,5906
6355
6355
  tilelang/language/allocate.py,sha256=YeZERWoqdZXaYbw58hoJ61VxrD3zsiI1sJIyg6vNy38,2855
6356
6356
  tilelang/language/builtin.py,sha256=sLg88K7qkI3jP2ks6TR3YULlCiU9f_cWJ9c79Pi_M5w,9959
@@ -6383,10 +6383,10 @@ tilelang/layout/__init__.py,sha256=F1wr9yBG9GW84h8KWXz-hRJFfqyZuY0EKSrG08KyrWQ,2
6383
6383
  tilelang/layout/fragment.py,sha256=zTv9P96lsYi9BWc5pxR4PA2Z5RSDGP7D5uJCiNw7_oc,8445
6384
6384
  tilelang/layout/layout.py,sha256=20CWxz_S8k_WNvWiR4gdIrEsQ36e5bsnOEqmu4zGk_c,4311
6385
6385
  tilelang/layout/swizzle.py,sha256=PMqu_s1sNCh9uo8eDs5qmLKXnDqZwv34GT3H9D4YDO0,438
6386
- tilelang/lib/libtilelang.so,sha256=XNX_4wZkR2yki7Jc2Q6hxRhhfcf32LoCnuOBX4TLnTU,5040720
6387
- tilelang/lib/libtilelang_module.so,sha256=5urBFc9szW49ORCkvCDgfWavEAQKs9a6wQbuEQ2Bl1s,5040720
6388
- tilelang/lib/libtvm.so,sha256=PK8xrRlNpJ8Pox1pxcVGSpdrfB5T89NaujkfDyvus0o,83982112
6389
- tilelang/lib/libtvm_runtime.so,sha256=JMGIbjqmERUW2RN7KKUxx4DsPnf1RjZIKWXb7Trnrw0,4794680
6386
+ tilelang/lib/libtilelang.so,sha256=ZKzH7wCpSuIPThuUIfB1brqjN6lE4IGKTNqfJJNwliI,5040720
6387
+ tilelang/lib/libtilelang_module.so,sha256=E23iuTGfHqKQOq8mp-L7Gx91zeCnXZLRC-wZhUNDqUs,5040720
6388
+ tilelang/lib/libtvm.so,sha256=TMs6mrRyGE9_HJ8ZoxE_gBrOgmQX_VplIu0fXeIZ9-o,83982112
6389
+ tilelang/lib/libtvm_runtime.so,sha256=a2o2Zu3-6wPZZXlcEVeCIjIdJYihgv4fw26aLWnLQ8Y,4794680
6390
6390
  tilelang/math/__init__.py,sha256=JC4fqrU_LV_wDErti-wHNr4j6_mqP1PsK0qqkhaSzRU,209
6391
6391
  tilelang/primitives/__init__.py,sha256=10gQN3QWUFM1nkGXY46QFcWUXxwsKMsVn23JdyFHil4,167
6392
6392
  tilelang/primitives/gemm/__init__.py,sha256=j62ObmbL5Q6m3lSouNBQDk1hZZRnSp4UNNCCaSlKYXU,1658
@@ -6416,7 +6416,7 @@ tilelang/src/tl_templates/cuda/gemm_sm90.h,sha256=S3v63snxR_3TEg8LLCoR6cqGVgii8k
6416
6416
  tilelang/src/tl_templates/cuda/ldsm.h,sha256=TxCxYVzUK4tvUNVqULCL5HEaAuW9vOv0_-QYmoRFUkM,5053
6417
6417
  tilelang/src/tl_templates/cuda/reduce.h,sha256=U9mKcHSttin1FQ0BohpaP0bHvgPvb3t-czwTuDeK5-8,4394
6418
6418
  tilelang/src/tl_templates/cuda/threadblock_swizzle.h,sha256=GIXQwC1gzwUhnq4CzORHh5hA_QHVfMrOcUeGTy1Fon8,1945
6419
- tilelang/src/tl_templates/hip/common.h,sha256=KzMntW2OlABMwYjlxNqKiBDZQXQNrYApeVf8F9548s4,3591
6419
+ tilelang/src/tl_templates/hip/common.h,sha256=C4O6p1bStNX3hmvZm8QLiWJphTf-4a8OirZmOEnBhBk,3967
6420
6420
  tilelang/src/tl_templates/hip/copy.h,sha256=fGHkbe4ReXoEtIWrgQ-mlCycaIL65SvNGWK1OJZdUQo,3324
6421
6421
  tilelang/src/tl_templates/hip/debug.h,sha256=9xGr4ka5x_nvY55XwbgTJFFwEnd09ta9jAZwjHyQau0,8231
6422
6422
  tilelang/src/tl_templates/hip/gemm.h,sha256=lYeOjV8OG2oZbcS7ByzOudE7i0FQJ71mrUcImkfhTrg,11610
@@ -6437,8 +6437,8 @@ tilelang/utils/deprecated.py,sha256=CiZ9y_76_dZ24SFDdasDiLmibwi6xO2Gdj6WzTWU0Qg,
6437
6437
  tilelang/utils/language.py,sha256=KUzUZ8Z2x1np0Hu_MrjWOIcRrVAZHX90li1Xw9fYZXY,3291
6438
6438
  tilelang/utils/target.py,sha256=P-74pdCLWcp2MZMQUoPIFwKF1NZ1QT-L0VroIL8m2to,2486
6439
6439
  tilelang/utils/tensor.py,sha256=SZ4ewoJ-Mq3zg8zIHS7-XLUmYDdlNwh841yUkjnQtNU,12573
6440
- tilelang_rocm-0.1.4.post11.dist-info/licenses/LICENSE,sha256=v9fVeAgRKQXc5ySwTns767gj0-dHN9XYPpGURkAVAXs,1127
6441
- tilelang_rocm-0.1.4.post11.dist-info/METADATA,sha256=mj9fK8dUTSpEU776wVQ23WWkbeuwuFtMp1UNaCvilZ8,13076
6442
- tilelang_rocm-0.1.4.post11.dist-info/WHEEL,sha256=0-G7woG4LgutcYzUGJCOYFgoh749-FtfhSMeIPLVGS0,104
6443
- tilelang_rocm-0.1.4.post11.dist-info/top_level.txt,sha256=qvMq-AYkDVggI-9VIAzCe5CXHl66IEWj7J29-JbuFsI,21
6444
- tilelang_rocm-0.1.4.post11.dist-info/RECORD,,
6440
+ tilelang_rocm-0.1.4.post12.dist-info/licenses/LICENSE,sha256=v9fVeAgRKQXc5ySwTns767gj0-dHN9XYPpGURkAVAXs,1127
6441
+ tilelang_rocm-0.1.4.post12.dist-info/METADATA,sha256=NTJMFhxd7258jXSi3h5eBbJNpO0HpAfXviyafU8ouvQ,13076
6442
+ tilelang_rocm-0.1.4.post12.dist-info/WHEEL,sha256=0-G7woG4LgutcYzUGJCOYFgoh749-FtfhSMeIPLVGS0,104
6443
+ tilelang_rocm-0.1.4.post12.dist-info/top_level.txt,sha256=qvMq-AYkDVggI-9VIAzCe5CXHl66IEWj7J29-JbuFsI,21
6444
+ tilelang_rocm-0.1.4.post12.dist-info/RECORD,,