tritonparse 0.2.4.dev20251007071533__py3-none-any.whl → 0.2.4.dev20251009071511__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tritonparse might be problematic. Click here for more details.

@@ -17,6 +17,8 @@ class TritonParseManager:
17
17
  self,
18
18
  enable_trace_launch=False,
19
19
  split_inductor_compilations=True,
20
+ enable_tensor_blob_storage=False,
21
+ tensor_storage_quota=None,
20
22
  **parse_kwargs,
21
23
  ):
22
24
  """
@@ -25,17 +27,28 @@ class TritonParseManager:
25
27
  Args:
26
28
  enable_trace_launch: Whether to enable trace launch
27
29
  split_inductor_compilations: Whether to split inductor compilations in the output
30
+ enable_tensor_blob_storage: Whether to enable tensor blob storage
31
+ tensor_storage_quota: Storage quota in bytes for tensor blobs (default: 100GB)
28
32
  **parse_kwargs: Additional keyword arguments to pass to unified_parse
29
33
  """
30
34
  self.enable_trace_launch = enable_trace_launch
31
35
  self.split_inductor_compilations = split_inductor_compilations
36
+ self.enable_tensor_blob_storage = enable_tensor_blob_storage
37
+ self.tensor_storage_quota = tensor_storage_quota
32
38
  self.parse_kwargs = parse_kwargs
33
39
  self.dir_path = None
34
40
  self.output_link = None
35
41
 
36
42
  def __enter__(self):
37
43
  self.dir_path = createUniqueTempDirectory()
38
- init(self.dir_path, enable_trace_launch=self.enable_trace_launch)
44
+ init_kwargs = {
45
+ "enable_trace_launch": self.enable_trace_launch,
46
+ "enable_tensor_blob_storage": self.enable_tensor_blob_storage,
47
+ }
48
+ if self.tensor_storage_quota is not None:
49
+ init_kwargs["tensor_storage_quota"] = self.tensor_storage_quota
50
+
51
+ init(self.dir_path, **init_kwargs)
39
52
  return self
40
53
 
41
54
  def __exit__(self, exc_type, exc_val, exc_tb):
@@ -3,13 +3,16 @@ This file is automatically generated by TritonParse reproducer.
3
3
  It contains a smallest testing example for a Triton kernel.
4
4
  """
5
5
 
6
+ import gzip
6
7
  import hashlib
7
8
  import importlib
9
+ import io
8
10
  import json
9
11
  import logging
10
12
  import sys
11
13
  from functools import lru_cache
12
14
  from pathlib import Path
15
+ from typing import Union
13
16
 
14
17
  import torch
15
18
 
@@ -42,13 +45,14 @@ def _get_triton_tensor_types():
42
45
  )
43
46
 
44
47
 
45
- def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
48
+ def load_tensor(tensor_file_path: Union[str, Path], device: str = None) -> torch.Tensor:
46
49
  """
47
50
  Load a tensor from its file path and verify its integrity using the hash in the filename.
48
51
 
49
52
  Args:
50
- tensor_file_path (str): Direct path to the tensor .bin file. The filename should be
51
- the hash of the file contents followed by .bin extension.
53
+ tensor_file_path (str | Path): Direct path to the tensor file. Supports both:
54
+ - .bin.gz: gzip-compressed tensor (hash is of uncompressed data)
55
+ - .bin: uncompressed tensor (for backward compatibility)
52
56
  device (str, optional): Device to load the tensor to (e.g., 'cuda:0', 'cpu').
53
57
  If None, keeps the tensor on its original device.
54
58
 
@@ -65,13 +69,26 @@ def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
65
69
  if not blob_path.exists():
66
70
  raise FileNotFoundError(f"Tensor blob not found: {blob_path}")
67
71
 
68
- # Extract expected hash from filename (remove .bin extension)
69
- expected_hash = blob_path.stem
72
+ # Detect compression by file extension
73
+ is_compressed = blob_path.name.endswith(".bin.gz")
70
74
 
71
- # Compute actual hash of file contents
72
- with open(blob_path, "rb") as f:
73
- file_contents = f.read()
74
- computed_hash = hashlib.blake2b(file_contents).hexdigest()
75
+ # Read file contents (decompress if needed)
76
+ try:
77
+ with open(blob_path, "rb") as f:
78
+ file_obj = gzip.GzipFile(fileobj=f, mode="rb") if is_compressed else f
79
+ file_contents = file_obj.read()
80
+ except (OSError, gzip.BadGzipFile) as e:
81
+ if is_compressed:
82
+ raise RuntimeError(f"Failed to decompress gzip file {blob_path}: {str(e)}")
83
+ else:
84
+ raise RuntimeError(f"Failed to read file {blob_path}: {str(e)}")
85
+
86
+ # Extract expected hash from filename
87
+ # abc123.bin.gz -> abc123 or abc123.bin -> abc123
88
+ expected_hash = blob_path.name.removesuffix(".bin.gz" if is_compressed else ".bin")
89
+
90
+ # Compute hash of uncompressed data
91
+ computed_hash = hashlib.blake2b(file_contents).hexdigest()
75
92
 
76
93
  # Verify hash matches filename
77
94
  if computed_hash != expected_hash:
@@ -80,12 +97,11 @@ def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
80
97
  )
81
98
 
82
99
  try:
83
- # Load the tensor using torch.load (tensors are saved with torch.save)
84
- # If device is None, keep tensor on its original device, otherwise move to specified device
85
- tensor = torch.load(blob_path, map_location=device)
100
+ # Load the tensor from memory buffer
101
+ tensor = torch.load(io.BytesIO(file_contents), map_location=device)
86
102
  return tensor
87
103
  except Exception as e:
88
- raise RuntimeError(f"Failed to load tensor from {blob_path}: {str(e)}") from e
104
+ raise RuntimeError(f"Failed to load tensor from {blob_path}: {str(e)}")
89
105
 
90
106
 
91
107
  def create_args_from_json_file(json_path):
@@ -126,6 +142,171 @@ def create_args_from_json(data):
126
142
  return grid, args_dict
127
143
 
128
144
 
145
+ def _apply_stride_and_offset(tensor, shape, stride, storage_offset):
146
+ """
147
+ Apply custom stride and storage offset to a tensor if needed.
148
+
149
+ Args:
150
+ tensor: The base contiguous tensor
151
+ shape: The desired shape
152
+ stride: The desired stride (or None for contiguous)
153
+ storage_offset: The desired storage offset
154
+
155
+ Returns:
156
+ torch.Tensor: The strided tensor view or original tensor if contiguous
157
+ """
158
+ if stride is None:
159
+ return tensor
160
+
161
+ # Calculate expected contiguous stride
162
+ expected_contiguous_stride = []
163
+ s = 1
164
+ for dim_size in reversed(shape):
165
+ expected_contiguous_stride.insert(0, s)
166
+ s *= dim_size
167
+
168
+ # If stride matches contiguous stride and no storage offset, return as-is
169
+ if tuple(stride) == tuple(expected_contiguous_stride) and storage_offset == 0:
170
+ return tensor
171
+
172
+ # Calculate required storage size
173
+ if len(shape) > 0 and len(stride) > 0:
174
+ max_offset = storage_offset
175
+ for dim_stride, dim_size in zip(stride, shape):
176
+ if dim_size > 0:
177
+ max_offset += dim_stride * (dim_size - 1)
178
+ storage_size = max_offset + 1
179
+ else:
180
+ storage_size = storage_offset + 1
181
+
182
+ # Create larger storage tensor and create strided view
183
+ storage_tensor = torch.empty(storage_size, dtype=tensor.dtype, device=tensor.device)
184
+
185
+ # Create strided view
186
+ strided_view = storage_tensor.as_strided(
187
+ size=shape, stride=stride, storage_offset=storage_offset
188
+ )
189
+
190
+ # Copy data from the base tensor into the strided layout
191
+ strided_view.copy_(tensor.flatten()[: strided_view.numel()].view(shape))
192
+
193
+ return strided_view
194
+
195
+
196
+ def _create_base_tensor(arg_info) -> torch.Tensor:
197
+ if arg_info.get("blob_path"):
198
+ return load_tensor(arg_info.get("blob_path"), arg_info.get("device"))
199
+
200
+ # Extract basic tensor properties
201
+ dtype_str = arg_info.get("dtype")
202
+ try:
203
+ torch_dtype = getattr(torch, dtype_str.split(".")[-1])
204
+ except AttributeError:
205
+ logging.error(f"Unsupported dtype: {dtype_str}. Defaulting to float32.")
206
+ torch_dtype = torch.float32
207
+
208
+ shape = arg_info.get("shape", [])
209
+ device = arg_info.get("device", "cpu")
210
+
211
+ # Extract statistical information if available
212
+ mean = arg_info.get("mean")
213
+ std = arg_info.get("std")
214
+ min_val = arg_info.get("min")
215
+ max_val = arg_info.get("max")
216
+ has_stats = (
217
+ mean is not None
218
+ and std is not None
219
+ and min_val is not None
220
+ and max_val is not None
221
+ )
222
+
223
+ if arg_info.get("tensor_capture_error", False):
224
+ logging.error(
225
+ f"Error: Tensor '{arg_info.get('name', '')}' had capture error. Generating random tensor instead."
226
+ )
227
+
228
+ # Use a dummy tensor to check properties of the dtype
229
+ tensor_props = torch.empty(0, dtype=torch_dtype)
230
+
231
+ # Case 1: Floating point types
232
+ if tensor_props.is_floating_point():
233
+ if has_stats:
234
+ # Generate tensor with statistical properties matching original data
235
+ if std == 0 or min_val == max_val:
236
+ # Constant tensor
237
+ return torch.full(shape, mean, dtype=torch_dtype, device=device)
238
+ # Generate normal distribution with mean and std, then clamp to [min, max]
239
+ tensor = torch.randn(shape, dtype=torch.float32, device=device) * std + mean
240
+ tensor = torch.clamp(tensor, min=min_val, max=max_val)
241
+ return tensor.to(torch_dtype)
242
+ else:
243
+ # Fallback to original random generation
244
+ if torch_dtype in [torch.float8_e4m3fn, torch.float8_e5m2]:
245
+ tmp = torch.rand(shape, dtype=torch.float32, device=device)
246
+ return tmp.to(torch_dtype)
247
+ else:
248
+ return torch.empty(shape, dtype=torch_dtype, device=device).random_()
249
+
250
+ # Case 2: Integer types
251
+ elif torch_dtype in [
252
+ torch.int8,
253
+ torch.int16,
254
+ torch.int32,
255
+ torch.int64,
256
+ torch.uint8,
257
+ torch.bool,
258
+ ]:
259
+ if has_stats and torch_dtype != torch.bool:
260
+ # Generate tensor with statistical properties, then round for integers
261
+ if std == 0 or min_val == max_val:
262
+ # Constant tensor
263
+ return torch.full(shape, int(mean), dtype=torch_dtype, device=device)
264
+ tensor = torch.randn(shape, dtype=torch.float32, device=device) * std + mean
265
+ tensor = torch.clamp(tensor, min=min_val, max=max_val)
266
+ return torch.round(tensor).to(torch_dtype)
267
+ else:
268
+ # Fallback to original random generation
269
+ return torch.empty(shape, dtype=torch_dtype, device=device).random_()
270
+
271
+ # Case 3: Complex numbers need special handling
272
+ elif tensor_props.is_complex():
273
+ # Complex types: fallback to original logic for now
274
+ # TODO: Could be improved to use statistical info if available
275
+ float_dtype = torch.float32 if torch_dtype == torch.complex64 else torch.float64
276
+ real_part = torch.rand(shape, dtype=float_dtype, device=device)
277
+ imag_part = torch.rand(shape, dtype=float_dtype, device=device)
278
+ return torch.complex(real_part, imag_part)
279
+
280
+ # Case 4: Handle other unsigned integers (like uint32) which fail with random_()
281
+ elif "uint" in str(torch_dtype):
282
+ if has_stats:
283
+ # Generate tensor with statistical properties for unsigned integers
284
+ if std == 0 or min_val == max_val:
285
+ return torch.full(shape, int(mean), dtype=torch_dtype, device=device)
286
+ tensor = torch.randn(shape, dtype=torch.float32, device=device) * std + mean
287
+ tensor = torch.clamp(tensor, min=min_val, max=max_val)
288
+ return torch.round(tensor).to(torch_dtype)
289
+ else:
290
+ # Fallback to original random generation
291
+ return torch.randint(0, 1000, shape, dtype=torch_dtype, device=device)
292
+
293
+ # Case 5: If we don't know how to handle the type, raise an error
294
+ else:
295
+ raise NotImplementedError(
296
+ f"Random data generation not implemented for dtype: {torch_dtype}"
297
+ )
298
+
299
+
300
+ def _create_tensor(arg_info) -> torch.Tensor:
301
+ tensor = _create_base_tensor(arg_info)
302
+
303
+ # Apply stride and storage offset if needed
304
+ shape = arg_info.get("shape", [])
305
+ stride = arg_info.get("stride")
306
+ storage_offset = arg_info.get("storage_offset", 0)
307
+ return _apply_stride_and_offset(tensor, shape, stride, storage_offset)
308
+
309
+
129
310
  def _create_arg_from_info(arg_info):
130
311
  """
131
312
  Recursively construct a kernel argument from its JSON schema.
@@ -150,121 +331,7 @@ def _create_arg_from_info(arg_info):
150
331
  return arg_info.get("value")
151
332
 
152
333
  elif arg_type == "tensor":
153
- if arg_info.get("blob_path"):
154
- return load_tensor(arg_info.get("blob_path"), arg_info.get("device"))
155
-
156
- # Extract basic tensor properties
157
- dtype_str = arg_info.get("dtype")
158
- try:
159
- torch_dtype = getattr(torch, dtype_str.split(".")[-1])
160
- except AttributeError:
161
- logging.error(f"Unsupported dtype: {dtype_str}. Defaulting to float32.")
162
- torch_dtype = torch.float32
163
-
164
- shape = arg_info.get("shape", [])
165
- device = arg_info.get("device", "cpu")
166
-
167
- # Extract statistical information if available
168
- mean = arg_info.get("mean")
169
- std = arg_info.get("std")
170
- min_val = arg_info.get("min")
171
- max_val = arg_info.get("max")
172
- has_stats = (
173
- mean is not None
174
- and std is not None
175
- and min_val is not None
176
- and max_val is not None
177
- )
178
-
179
- if arg_info.get("tensor_capture_error", False):
180
- logging.error(
181
- f"Error: Tensor '{arg_info.get('name', '')}' had capture error. Generating random tensor instead."
182
- )
183
-
184
- # Use a dummy tensor to check properties of the dtype
185
- tensor_props = torch.empty(0, dtype=torch_dtype)
186
-
187
- # Case 1: Floating point types
188
- if tensor_props.is_floating_point():
189
- if has_stats:
190
- # Generate tensor with statistical properties matching original data
191
- if std == 0 or min_val == max_val:
192
- # Constant tensor
193
- return torch.full(shape, mean, dtype=torch_dtype, device=device)
194
- # Generate normal distribution with mean and std, then clamp to [min, max]
195
- tensor = (
196
- torch.randn(shape, dtype=torch.float32, device=device) * std + mean
197
- )
198
- tensor = torch.clamp(tensor, min=min_val, max=max_val)
199
- return tensor.to(torch_dtype)
200
- else:
201
- # Fallback to original random generation
202
- if torch_dtype in [torch.float8_e4m3fn, torch.float8_e5m2]:
203
- tmp = torch.rand(shape, dtype=torch.float32, device=device)
204
- return tmp.to(torch_dtype)
205
- else:
206
- return torch.empty(
207
- shape, dtype=torch_dtype, device=device
208
- ).random_()
209
-
210
- # Case 2: Integer types
211
- elif torch_dtype in [
212
- torch.int8,
213
- torch.int16,
214
- torch.int32,
215
- torch.int64,
216
- torch.uint8,
217
- torch.bool,
218
- ]:
219
- if has_stats and torch_dtype != torch.bool:
220
- # Generate tensor with statistical properties, then round for integers
221
- if std == 0 or min_val == max_val:
222
- # Constant tensor
223
- return torch.full(
224
- shape, int(mean), dtype=torch_dtype, device=device
225
- )
226
- tensor = (
227
- torch.randn(shape, dtype=torch.float32, device=device) * std + mean
228
- )
229
- tensor = torch.clamp(tensor, min=min_val, max=max_val)
230
- return torch.round(tensor).to(torch_dtype)
231
- else:
232
- # Fallback to original random generation
233
- return torch.empty(shape, dtype=torch_dtype, device=device).random_()
234
-
235
- # Case 3: Complex numbers need special handling
236
- elif tensor_props.is_complex():
237
- # Complex types: fallback to original logic for now
238
- # TODO: Could be improved to use statistical info if available
239
- float_dtype = (
240
- torch.float32 if torch_dtype == torch.complex64 else torch.float64
241
- )
242
- real_part = torch.rand(shape, dtype=float_dtype, device=device)
243
- imag_part = torch.rand(shape, dtype=float_dtype, device=device)
244
- return torch.complex(real_part, imag_part)
245
-
246
- # Case 4: Handle other unsigned integers (like uint32) which fail with random_()
247
- elif "uint" in str(torch_dtype):
248
- if has_stats:
249
- # Generate tensor with statistical properties for unsigned integers
250
- if std == 0 or min_val == max_val:
251
- return torch.full(
252
- shape, int(mean), dtype=torch_dtype, device=device
253
- )
254
- tensor = (
255
- torch.randn(shape, dtype=torch.float32, device=device) * std + mean
256
- )
257
- tensor = torch.clamp(tensor, min=min_val, max=max_val)
258
- return torch.round(tensor).to(torch_dtype)
259
- else:
260
- # Fallback to original random generation
261
- return torch.randint(0, 1000, shape, dtype=torch_dtype, device=device)
262
-
263
- # Case 5: If we don't know how to handle the type, raise an error
264
- else:
265
- raise NotImplementedError(
266
- f"Random data generation not implemented for dtype: {torch_dtype}"
267
- )
334
+ return _create_tensor(arg_info)
268
335
 
269
336
  elif arg_type == "triton_kernels.tensor.Tensor":
270
337
  if not TRITON_KERNELS_CUSTOM_TYPES:
@@ -3,6 +3,7 @@
3
3
  import atexit
4
4
  import fnmatch
5
5
  import gzip
6
+ import hashlib
6
7
  import importlib
7
8
  import inspect
8
9
  import io
@@ -11,6 +12,7 @@ import logging
11
12
  import math
12
13
  import os
13
14
  import subprocess
15
+ import tempfile
14
16
  from collections import defaultdict
15
17
  from collections.abc import Mapping
16
18
  from dataclasses import asdict, is_dataclass
@@ -62,8 +64,31 @@ TRITONPARSE_DUMP_SASS = os.getenv("TRITONPARSE_DUMP_SASS", None) in [
62
64
 
63
65
  # The flag to mark if launch is traced. It is used to avoid initilizing the launch hook twice.
64
66
  _trace_launch_enabled = False
67
+ # Enable tensor blob storage
68
+ TRITONPARSE_SAVE_TENSOR_BLOBS = os.getenv("TRITONPARSE_SAVE_TENSOR_BLOBS", "0") in [
69
+ "1",
70
+ "true",
71
+ "True",
72
+ ]
73
+ # Tensor size limit in bytes (default 10GB)
74
+ TRITONPARSE_TENSOR_SIZE_LIMIT = int(
75
+ os.getenv("TRITONPARSE_TENSOR_SIZE_LIMIT", str(10 * 1024 * 1024 * 1024))
76
+ )
77
+ # Tensor storage quota in bytes (default 100GB) - tracks compressed size for current run
78
+ TRITONPARSE_TENSOR_STORAGE_QUOTA = int(
79
+ os.getenv("TRITONPARSE_TENSOR_STORAGE_QUOTA", str(100 * 1024 * 1024 * 1024))
80
+ )
81
+ # Compression threshold in bytes (default 1MB) - only compress blobs >= this size
82
+ TRITONPARSE_COMPRESSION_THRESHOLD = 1 * 1024 * 1024
83
+ # Compression level for gzip (0-9, higher = better compression but slower)
84
+ TRITONPARSE_COMPRESSION_LEVEL = 4
85
+ # Log statistics every N saved blobs
86
+ TRITONPARSE_STATS_LOG_FREQUENCY = 100
65
87
 
66
88
  TRITON_TRACE_HANDLER = None
89
+ # Global tensor blob manager instance
90
+ TENSOR_BLOB_MANAGER = None
91
+
67
92
  if importlib.util.find_spec("torch") is not None:
68
93
  TORCH_INSTALLED = True
69
94
  import torch
@@ -72,6 +97,267 @@ else:
72
97
  TORCH_INSTALLED = False
73
98
 
74
99
 
100
+ class TensorBlobManager:
101
+ """
102
+ Manager for storing tensor data as content-addressed blobs.
103
+
104
+ Uses BLAKE2b hashing for content addressing and stores blobs in a two-level
105
+ directory structure to avoid filesystem limitations with large numbers of files.
106
+ """
107
+
108
+ def __init__(
109
+ self,
110
+ root_dir: Optional[str] = None,
111
+ storage_quota: Optional[int] = None,
112
+ ):
113
+ self.root_dir = None
114
+ self.hash_to_path_cache = {} # In-memory cache for hash -> path mapping
115
+ self.compression_threshold = TRITONPARSE_COMPRESSION_THRESHOLD
116
+ self.storage_quota = (
117
+ storage_quota
118
+ if storage_quota is not None
119
+ else TRITONPARSE_TENSOR_STORAGE_QUOTA
120
+ )
121
+
122
+ # Resource statistics (tracks current run only)
123
+ self.total_compressed_bytes = 0 # Total compressed size written in this run
124
+ self.total_uncompressed_bytes = (
125
+ 0 # Total uncompressed size (for compression ratio)
126
+ )
127
+ self.blob_count = 0 # Total blob references (including dedup hits)
128
+ self.blob_saved_count = 0 # Actual blobs saved (excluding dedup hits)
129
+ self.storage_disabled = False # Whether storage has been disabled due to quota
130
+ self.storage_disabled_reason = None # Reason for disabling storage
131
+
132
+ if root_dir:
133
+ self.set_root_dir(root_dir)
134
+
135
+ def set_root_dir(self, root_dir: str):
136
+ """Set the root directory for blob storage."""
137
+ self.root_dir = Path(root_dir) / "saved_tensors"
138
+ self.root_dir.mkdir(parents=True, exist_ok=True)
139
+ log.debug(f"TensorBlobManager: using root directory {self.root_dir}")
140
+
141
+ def _compute_hash(self, data: bytes) -> str:
142
+ """Compute BLAKE2b hash of the data."""
143
+ return hashlib.blake2b(data).hexdigest()
144
+
145
+ def _get_blob_path(self, hash_hex: str, extension: str = ".bin.gz") -> Path:
146
+ """Get the file path for a given hash using two-level directory structure."""
147
+ if not self.root_dir:
148
+ raise ValueError("Root directory not set")
149
+
150
+ # Two-level directory: first 2 chars / full_hash{extension}
151
+ subdir = hash_hex[:2]
152
+ filename = f"{hash_hex}{extension}"
153
+ return (self.root_dir / subdir / filename).resolve()
154
+
155
+ def _get_tensor_size_bytes(self, tensor) -> int:
156
+ """Get tensor size in bytes before serialization."""
157
+ if hasattr(tensor, "numel") and hasattr(tensor, "element_size"):
158
+ return tensor.numel() * tensor.element_size()
159
+ return 0
160
+
161
+ def _log_statistics(self, final: bool = False):
162
+ """Print statistics about tensor blob storage.
163
+
164
+ Args:
165
+ final: If True, this is the final statistics message (e.g., when storage is disabled)
166
+ """
167
+ prefix = "📊 Final" if final else "📊"
168
+ compression_ratio = (
169
+ self.total_uncompressed_bytes / max(1, self.total_compressed_bytes)
170
+ if self.total_compressed_bytes > 0
171
+ else 0.0
172
+ )
173
+ dedup_count = self.blob_count - self.blob_saved_count
174
+
175
+ log.info(
176
+ f"{prefix} Tensor blob stats: "
177
+ f"{self.blob_saved_count} saved ({self.blob_count} total, {dedup_count} dedup), "
178
+ f"{self.total_compressed_bytes / 1024**3:.2f}GB compressed "
179
+ f"({self.total_uncompressed_bytes / 1024**3:.2f}GB uncompressed), "
180
+ f"compression ratio: {compression_ratio:.2f}x"
181
+ )
182
+
183
+ def _disable_storage(self, reason: str):
184
+ """Disable blob storage and log warning with statistics.
185
+
186
+ Args:
187
+ reason: The reason why storage is being disabled
188
+ """
189
+ if not self.storage_disabled: # Only disable once
190
+ self.storage_disabled = True
191
+ self.storage_disabled_reason = reason
192
+ log.warning(f"⚠️ TENSOR BLOB STORAGE DISABLED: {reason}")
193
+ self._log_statistics(final=True)
194
+
195
+ def save_tensor_blob(self, tensor) -> Dict[str, Any]:
196
+ """
197
+ Save tensor as a blob and return metadata.
198
+
199
+ Args:
200
+ tensor: PyTorch tensor to save
201
+
202
+ Returns:
203
+ Dictionary with blob metadata or error information:
204
+ - Success: {'tensor_hash': str, 'blob_path': str, 'blob_size': int,
205
+ 'blob_size_uncompressed': int, 'compression': str,
206
+ 'compression_ratio': float, 'serialization_method': str}
207
+ - Dedup hit: Same as success but from cache (not counted in quota)
208
+ - Error: {'error': str, 'tensor_hash': None}
209
+ """
210
+ # Early exit: Check if storage is disabled
211
+ if self.storage_disabled:
212
+ return {"error": self.storage_disabled_reason, "tensor_hash": None}
213
+
214
+ # Early exit: Check if root directory is set
215
+ if not self.root_dir:
216
+ return {"error": "Blob storage not initialized", "tensor_hash": None}
217
+
218
+ try:
219
+ # Check tensor size before serialization
220
+ tensor_size = self._get_tensor_size_bytes(tensor)
221
+ if tensor_size > TRITONPARSE_TENSOR_SIZE_LIMIT:
222
+ log.warning(
223
+ f"Tensor size {tensor_size} bytes exceeds limit {TRITONPARSE_TENSOR_SIZE_LIMIT} bytes, skipping blob storage"
224
+ )
225
+ return {
226
+ "error": f"Tensor size {tensor_size} bytes exceeds limit {TRITONPARSE_TENSOR_SIZE_LIMIT} bytes",
227
+ "tensor_hash": None,
228
+ }
229
+
230
+ # Serialize tensor using torch.save
231
+ import io
232
+
233
+ buffer = io.BytesIO()
234
+ if TORCH_INSTALLED:
235
+ torch.save(tensor.cpu(), buffer)
236
+ else:
237
+ return {
238
+ "error": "PyTorch not available for tensor serialization",
239
+ "tensor_hash": None,
240
+ }
241
+
242
+ blob_data = buffer.getvalue()
243
+ uncompressed_size = len(blob_data)
244
+
245
+ # Compute hash on uncompressed data for content addressing
246
+ hash_hex = self._compute_hash(blob_data)
247
+
248
+ # Check for deduplication (before compression to save work)
249
+ if hash_hex in self.hash_to_path_cache:
250
+ blob_path = self.hash_to_path_cache[hash_hex]
251
+ try:
252
+ # Try to access the file - handles race condition where file might be deleted
253
+ disk_size = blob_path.stat().st_size
254
+ compression = (
255
+ "gzip" if str(blob_path).endswith(".bin.gz") else "none"
256
+ )
257
+ compression_ratio = uncompressed_size / max(1, disk_size)
258
+
259
+ # Deduplication hit - increment count but don't add to quota
260
+ self.blob_count += 1
261
+
262
+ return {
263
+ "tensor_hash": hash_hex,
264
+ "blob_path": str(blob_path),
265
+ "blob_size": disk_size,
266
+ "blob_size_uncompressed": uncompressed_size,
267
+ "compression": compression,
268
+ "compression_ratio": compression_ratio,
269
+ "serialization_method": "torch_save",
270
+ "deduplicated": True,
271
+ }
272
+ except (FileNotFoundError, OSError):
273
+ # File was deleted or inaccessible - remove from cache and continue to save
274
+ log.debug(
275
+ f"Cached blob file no longer exists: {blob_path}, will re-save"
276
+ )
277
+ self.hash_to_path_cache.pop(hash_hex, None)
278
+
279
+ # Decide whether to compress based on size threshold
280
+ if uncompressed_size >= self.compression_threshold:
281
+ # Compress the data
282
+ data_to_write = gzip.compress(
283
+ blob_data, compresslevel=TRITONPARSE_COMPRESSION_LEVEL
284
+ )
285
+ file_extension = ".bin.gz"
286
+ compression = "gzip"
287
+ else:
288
+ # Don't compress small files (overhead not worth it)
289
+ data_to_write = blob_data
290
+ file_extension = ".bin"
291
+ compression = "none"
292
+
293
+ disk_size = len(data_to_write)
294
+
295
+ # Check quota BEFORE writing
296
+ if self.total_compressed_bytes + disk_size > self.storage_quota:
297
+ self._disable_storage(
298
+ f"Storage quota would be exceeded: "
299
+ f"{(self.total_compressed_bytes + disk_size) / 1024**3:.2f}GB > "
300
+ f"{self.storage_quota / 1024**3:.2f}GB limit"
301
+ )
302
+ return {"error": self.storage_disabled_reason, "tensor_hash": None}
303
+
304
+ # Create blob file path with appropriate extension
305
+ blob_path = self._get_blob_path(hash_hex, extension=file_extension)
306
+ blob_path.parent.mkdir(parents=True, exist_ok=True)
307
+
308
+ # Atomic write using temporary file + rename
309
+ with tempfile.NamedTemporaryFile(
310
+ mode="wb",
311
+ dir=blob_path.parent,
312
+ prefix=f".tmp_{hash_hex}_",
313
+ delete=False,
314
+ ) as tmp_file:
315
+ tmp_file.write(data_to_write)
316
+ tmp_path = Path(tmp_file.name)
317
+
318
+ # Atomic rename
319
+ tmp_path.rename(blob_path)
320
+
321
+ # Update cache and statistics
322
+ self.hash_to_path_cache[hash_hex] = blob_path
323
+ self.total_compressed_bytes += disk_size
324
+ self.total_uncompressed_bytes += uncompressed_size
325
+ self.blob_count += 1
326
+ self.blob_saved_count += 1
327
+
328
+ # Log progress periodically
329
+ if self.blob_saved_count % TRITONPARSE_STATS_LOG_FREQUENCY == 0:
330
+ self._log_statistics()
331
+
332
+ log.debug(
333
+ f"Saved tensor blob: {hash_hex} -> {blob_path} ({disk_size} bytes, compression={compression})"
334
+ )
335
+
336
+ compression_ratio = uncompressed_size / max(1, disk_size)
337
+
338
+ return {
339
+ "tensor_hash": hash_hex,
340
+ "blob_path": str(blob_path),
341
+ "blob_size": disk_size,
342
+ "blob_size_uncompressed": uncompressed_size,
343
+ "compression": compression,
344
+ "compression_ratio": compression_ratio,
345
+ "serialization_method": "torch_save",
346
+ }
347
+
348
+ except OSError as e:
349
+ # Disk full, permission errors, etc. - disable storage to avoid repeated failures
350
+ error_msg = f"Failed to save tensor blob (I/O error): {str(e)}"
351
+ log.error(error_msg)
352
+ self._disable_storage(error_msg)
353
+ return {"error": error_msg, "tensor_hash": None}
354
+ except Exception as e:
355
+ # Other unexpected errors - log but don't disable storage
356
+ error_msg = f"Failed to save tensor blob: {str(e)}"
357
+ log.error(error_msg)
358
+ return {"error": error_msg, "tensor_hash": None}
359
+
360
+
75
361
  class TritonLogRecord(logging.LogRecord):
76
362
  """
77
363
  Custom LogRecord class for structured logging of Triton operations.
@@ -283,6 +569,11 @@ def _log_torch_tensor_info(tensor_value):
283
569
  except (RuntimeError, ValueError, TypeError) as e:
284
570
  log.error(f"Unable to compute tensor statistics: {e}")
285
571
  arg_info["tensor_capture_error"] = str(e)
572
+
573
+ # Add tensor blob storage if enabled
574
+ if TRITONPARSE_SAVE_TENSOR_BLOBS and TENSOR_BLOB_MANAGER is not None:
575
+ blob_info = TENSOR_BLOB_MANAGER.save_tensor_blob(tensor_value)
576
+ arg_info.update(blob_info)
286
577
  return arg_info
287
578
 
288
579
 
@@ -726,7 +1017,7 @@ def init_logs():
726
1017
  DEBUG:tritonparse_trace:
727
1018
  lines by blocking propagation to the root logger.
728
1019
  """
729
- global TRITON_TRACE_HANDLER, triton_trace_folder
1020
+ global TRITON_TRACE_HANDLER, triton_trace_folder, TENSOR_BLOB_MANAGER
730
1021
 
731
1022
  # Basic logger settings (safe to run on every call)
732
1023
  triton_trace_log.setLevel(logging.DEBUG)
@@ -752,6 +1043,16 @@ def init_logs():
752
1043
  TRITON_TRACE_HANDLER.setFormatter(TritonJsonFormatter())
753
1044
  triton_trace_log.addHandler(TRITON_TRACE_HANDLER)
754
1045
 
1046
+ # Initialize tensor blob manager if enabled
1047
+ if TRITONPARSE_SAVE_TENSOR_BLOBS and root_dir:
1048
+ if TENSOR_BLOB_MANAGER is None:
1049
+ TENSOR_BLOB_MANAGER = TensorBlobManager(
1050
+ root_dir=root_dir, storage_quota=TRITONPARSE_TENSOR_STORAGE_QUOTA
1051
+ )
1052
+ elif TENSOR_BLOB_MANAGER.root_dir is None:
1053
+ # Update root_dir if it wasn't set during initialization
1054
+ TENSOR_BLOB_MANAGER.set_root_dir(root_dir)
1055
+
755
1056
 
756
1057
  def trace_structured_triton(
757
1058
  name: str,
@@ -1153,6 +1454,8 @@ def init(
1153
1454
  enable_trace_launch: bool = False,
1154
1455
  enable_more_tensor_information: bool = False,
1155
1456
  enable_sass_dump: Optional[bool] = False,
1457
+ enable_tensor_blob_storage: bool = False,
1458
+ tensor_storage_quota: Optional[int] = None,
1156
1459
  ):
1157
1460
  """
1158
1461
  This function is a wrapper around init_basic() that also sets up the compilation listener. Its arguments have higher priority than the environment variables for same settings.
@@ -1163,9 +1466,14 @@ def init(
1163
1466
  enable_more_tensor_information (bool): Whether to enable more tensor information logging.
1164
1467
  It only works when enable_trace_launch/TRITON_TRACE_LAUNCH is True.
1165
1468
  enable_sass_dump (Optional[bool]): Whether to enable SASS dumping.
1469
+ enable_tensor_blob_storage (bool): Whether to enable tensor blob storage.
1470
+ tensor_storage_quota (Optional[int]): Storage quota in bytes for tensor blobs (default: 100GB).
1166
1471
  """
1167
1472
  global TRITON_TRACE_LAUNCH, TRITONPARSE_MORE_TENSOR_INFORMATION
1168
1473
  global TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOK, TRITONPARSE_DUMP_SASS
1474
+ global TRITONPARSE_SAVE_TENSOR_BLOBS, TRITONPARSE_TENSOR_STORAGE_QUOTA
1475
+
1476
+ # Set global flags BEFORE calling init_basic, so init_logs() can see them
1169
1477
  if enable_trace_launch:
1170
1478
  TRITON_TRACE_LAUNCH = True
1171
1479
  TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOK = True
@@ -1173,6 +1481,12 @@ def init(
1173
1481
  TRITONPARSE_MORE_TENSOR_INFORMATION = True
1174
1482
  if enable_sass_dump:
1175
1483
  TRITONPARSE_DUMP_SASS = True
1484
+ if enable_tensor_blob_storage:
1485
+ TRITONPARSE_SAVE_TENSOR_BLOBS = True
1486
+
1487
+ # Set the quota in global var for TensorBlobManager creation in init_logs()
1488
+ if tensor_storage_quota is not None:
1489
+ TRITONPARSE_TENSOR_STORAGE_QUOTA = tensor_storage_quota
1176
1490
 
1177
1491
  init_basic(trace_folder)
1178
1492
  from triton import knobs
@@ -1202,7 +1516,7 @@ def clear_logging_config():
1202
1516
  """
1203
1517
  global TRITON_TRACE_HANDLER, triton_trace_folder, _KERNEL_ALLOWLIST_PATTERNS
1204
1518
  global _trace_launch_enabled
1205
-
1519
+ global TENSOR_BLOB_MANAGER
1206
1520
  # 1. Clean up the log handler
1207
1521
  if TRITON_TRACE_HANDLER is not None:
1208
1522
  if TRITON_TRACE_HANDLER in triton_trace_log.handlers:
@@ -1215,7 +1529,10 @@ def clear_logging_config():
1215
1529
  _KERNEL_ALLOWLIST_PATTERNS = None
1216
1530
  _trace_launch_enabled = False
1217
1531
 
1218
- # 3. Reset Triton knobs
1532
+ # 3. Reset tensor blob manager and related flags
1533
+ TENSOR_BLOB_MANAGER = None
1534
+
1535
+ # 4. Reset Triton knobs
1219
1536
  # Check if triton was actually imported and used
1220
1537
  from triton import knobs
1221
1538
 
@@ -6,19 +6,23 @@ import tritonparse.tools.load_tensor as load_tensor
6
6
  tensor = load_tensor.load_tensor(tensor_file_path, device)
7
7
  """
8
8
 
9
+ import gzip
9
10
  import hashlib
11
+ import io
10
12
  from pathlib import Path
13
+ from typing import Union
11
14
 
12
15
  import torch
13
16
 
14
17
 
15
- def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
18
+ def load_tensor(tensor_file_path: Union[str, Path], device: str = None) -> torch.Tensor:
16
19
  """
17
20
  Load a tensor from its file path and verify its integrity using the hash in the filename.
18
21
 
19
22
  Args:
20
- tensor_file_path (str): Direct path to the tensor .bin file. The filename should be
21
- the hash of the file contents followed by .bin extension.
23
+ tensor_file_path (str | Path): Direct path to the tensor file. Supports both:
24
+ - .bin.gz: gzip-compressed tensor (hash is of uncompressed data)
25
+ - .bin: uncompressed tensor (for backward compatibility)
22
26
  device (str, optional): Device to load the tensor to (e.g., 'cuda:0', 'cpu').
23
27
  If None, keeps the tensor on its original device.
24
28
 
@@ -35,13 +39,26 @@ def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
35
39
  if not blob_path.exists():
36
40
  raise FileNotFoundError(f"Tensor blob not found: {blob_path}")
37
41
 
38
- # Extract expected hash from filename (remove .bin extension)
39
- expected_hash = blob_path.stem
42
+ # Detect compression by file extension
43
+ is_compressed = blob_path.name.endswith(".bin.gz")
40
44
 
41
- # Compute actual hash of file contents
42
- with open(blob_path, "rb") as f:
43
- file_contents = f.read()
44
- computed_hash = hashlib.blake2b(file_contents).hexdigest()
45
+ # Read file contents (decompress if needed)
46
+ try:
47
+ with open(blob_path, "rb") as f:
48
+ file_obj = gzip.GzipFile(fileobj=f, mode="rb") if is_compressed else f
49
+ file_contents = file_obj.read()
50
+ except (OSError, gzip.BadGzipFile) as e:
51
+ if is_compressed:
52
+ raise RuntimeError(f"Failed to decompress gzip file {blob_path}: {str(e)}")
53
+ else:
54
+ raise RuntimeError(f"Failed to read file {blob_path}: {str(e)}")
55
+
56
+ # Extract expected hash from filename
57
+ # abc123.bin.gz -> abc123 or abc123.bin -> abc123
58
+ expected_hash = blob_path.name.removesuffix(".bin.gz" if is_compressed else ".bin")
59
+
60
+ # Compute hash of uncompressed data
61
+ computed_hash = hashlib.blake2b(file_contents).hexdigest()
45
62
 
46
63
  # Verify hash matches filename
47
64
  if computed_hash != expected_hash:
@@ -50,9 +67,8 @@ def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
50
67
  )
51
68
 
52
69
  try:
53
- # Load the tensor using torch.load (tensors are saved with torch.save)
54
- # If device is None, keep tensor on its original device, otherwise move to specified device
55
- tensor = torch.load(blob_path, map_location=device)
70
+ # Load the tensor from memory buffer
71
+ tensor = torch.load(io.BytesIO(file_contents), map_location=device)
56
72
  return tensor
57
73
  except Exception as e:
58
74
  raise RuntimeError(f"Failed to load tensor from {blob_path}: {str(e)}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tritonparse
3
- Version: 0.2.4.dev20251007071533
3
+ Version: 0.2.4.dev20251009071511
4
4
  Summary: TritonParse: A Compiler Tracer, Visualizer, and mini-Reproducer Generator for Triton Kernels
5
5
  Author-email: Yueming Hao <yhao@meta.com>
6
6
  License-Expression: BSD-3-Clause
@@ -27,13 +27,27 @@ Dynamic: license-file
27
27
 
28
28
  ## ✨ Key Features
29
29
 
30
- - **🚀 Launch Difference Analysis** - Automatically detect and visualize variations in kernel launch parameters, helping you pinpoint performance bottlenecks and debug launch configurations.
31
- - **🔍 Interactive Visualization** - Explore Triton kernels with detailed metadata and stack traces
32
- - **📊 Multi-format IR Support** - View TTGIR, TTIR, LLIR, PTX, and AMDGCN in one place
33
- - **🔄 Side-by-side Comparison** - Compare IR stages with synchronized highlighting
34
- - **📝 Structured Logging** - Capture detailed compilation and launch events with source mapping
35
- - **🌐 Ready-to-use Interface** - No installation required, works in your browser
36
- - **🔒 Privacy-first** - All processing happens locally in your browser, no data uploaded
30
+ ### 🔍 Visualization & Analysis
31
+ - **🚀 Launch Difference Analysis** - Detect and visualize kernel launch parameter variations
32
+ - **📊 IR Code View** - Side-by-side IR viewing with synchronized highlighting and line mapping
33
+ - **🔄 File Diff View** - Compare kernels across different trace files side-by-side
34
+ - **📝 Multi-format IR Support** - View TTGIR, TTIR, LLIR, PTX, and AMDGCN
35
+ - **🎯 Interactive Code Views** - Click-to-highlight corresponding lines across IR stages
36
+
37
+ ### 🔧 Reproducer & Debugging Tools
38
+ - **🔄 Standalone Script Generation** - Extract any kernel into a self-contained Python script
39
+ - **💾 Tensor Data Reconstruction** - Preserve actual tensor data or use statistical approximation
40
+ - **🎯 Custom Templates** - Flexible reproducer templates for different workflows
41
+ - **🐛 Bug Isolation** - Share reproducible test cases for debugging and collaboration
42
+
43
+ ### 📊 Structured Logging & Analysis
44
+ - **📝 Compilation & Launch Tracing** - Capture detailed events with source mapping
45
+ - **🔍 Stack Trace Integration** - Full Python stack traces for debugging
46
+ - **📈 Metadata Extraction** - Comprehensive kernel statistics
47
+
48
+ ### 🛠️ Developer Tools
49
+ - **🌐 Browser-based Interface** - No installation required, works in your browser
50
+ - **🔒 Privacy-first** - All processing happens locally, no data uploaded
37
51
 
38
52
  ## 🚀 Quick Start
39
53
 
@@ -41,22 +55,22 @@ Dynamic: license-file
41
55
 
42
56
  ```python
43
57
  import tritonparse.structured_logging
58
+ import tritonparse.utils
44
59
 
45
- # Initialize logging with launch tracing enabled
60
+ # Initialize logging
46
61
  tritonparse.structured_logging.init("./logs/", enable_trace_launch=True)
47
62
 
48
63
  # Your Triton/PyTorch code here
49
64
  # ... your kernels ...
50
65
 
51
66
  # Parse and generate trace files
52
- import tritonparse.utils
53
- tritonparse.utils.unified_parse("./logs/")
67
+ tritonparse.utils.unified_parse("./logs/", out="./parsed_output")
54
68
  ```
55
- The example terminal output is:
56
- ```bash
57
- tritonparse log file list: /tmp/tmp1gan7zky/log_file_list.json
58
- INFO:tritonparse:Copying parsed logs from /tmp/tmp1gan7zky to /scratch/findhao/tritonparse/tests/parsed_output
59
69
 
70
+ <details>
71
+ <summary>📝 Example output (click to expand)</summary>
72
+
73
+ ```bash
60
74
  ================================================================================
61
75
  📁 TRITONPARSE PARSING RESULTS
62
76
  ================================================================================
@@ -64,13 +78,13 @@ INFO:tritonparse:Copying parsed logs from /tmp/tmp1gan7zky to /scratch/findhao/t
64
78
  📊 Total files generated: 2
65
79
 
66
80
  📄 Generated files:
67
- --------------------------------------------------
68
81
  1. 📝 dedicated_log_triton_trace_findhao__mapped.ndjson.gz (7.2KB)
69
82
  2. 📝 log_file_list.json (181B)
70
83
  ================================================================================
71
84
  ✅ Parsing completed successfully!
72
85
  ================================================================================
73
86
  ```
87
+ </details>
74
88
 
75
89
  ### 2. Visualize Results
76
90
 
@@ -78,6 +92,41 @@ INFO:tritonparse:Copying parsed logs from /tmp/tmp1gan7zky to /scratch/findhao/t
78
92
 
79
93
  > **🔒 Privacy Note**: Your trace files are processed entirely in your browser - nothing is uploaded to any server!
80
94
 
95
+ ### 3. Generate Reproducers (Optional)
96
+
97
+ Extract any kernel into a standalone, executable Python script for debugging or testing:
98
+
99
+ ```bash
100
+ # Generate reproducer from first launch event
101
+ tritonparse reproduce ./parsed_output/trace.ndjson.gz --line 2 --out-dir repro_output
102
+
103
+ # Run the generated reproducer
104
+ cd repro_output/<kernel_name>/
105
+ python repro_*.py
106
+ ```
107
+
108
+ **Python API:**
109
+ ```python
110
+ from tritonparse.reproducer.orchestrator import reproduce
111
+
112
+ result = reproduce(
113
+ input_path="./parsed_output/trace.ndjson.gz",
114
+ line_index=1, # Which launch event (1-based)
115
+ out_dir="repro_output"
116
+ )
117
+ ```
118
+
119
+ <details>
120
+ <summary>🎯 Common Reproducer Use Cases (click to expand)</summary>
121
+
122
+ - **🐛 Bug Isolation**: Extract a failing kernel into a minimal standalone script
123
+ - **⚡ Performance Testing**: Benchmark specific kernels without running the full application
124
+ - **🤝 Team Collaboration**: Share reproducible test cases with colleagues or in bug reports
125
+ - **📊 Regression Testing**: Compare kernel behavior and performance across different versions
126
+ - **🔍 Deep Debugging**: Modify and experiment with kernel parameters in isolation
127
+
128
+ </details>
129
+
81
130
  ## 🛠️ Installation
82
131
 
83
132
  **For basic usage (trace generation):**
@@ -106,18 +155,13 @@ pip install triton
106
155
 
107
156
  | 📖 Guide | Description |
108
157
  |----------|-------------|
109
- | **[🏠 Wiki Home](https://github.com/meta-pytorch/tritonparse/wiki)** | Complete documentation and navigation |
110
- | **[📦 Installation Guide](https://github.com/meta-pytorch/tritonparse/wiki/01.-Installation)** | Detailed setup for all scenarios |
111
- | **[📋 Usage Guide](https://github.com/meta-pytorch/tritonparse/wiki/02.-Usage-Guide)** | Complete workflow and examples |
112
- | **[🌐 Web Interface Guide](https://github.com/meta-pytorch/tritonparse/wiki/03.-Web-Interface-Guide)** | Master the visualization interface |
113
- | **[🔧 Developer Guide](https://github.com/meta-pytorch/tritonparse/wiki/04.-Developer-Guide)** | Contributing and development setup |
114
- | **[ FAQ](https://github.com/meta-pytorch/tritonparse/wiki/06.-FAQ)** | Frequently asked questions |
115
-
116
- ## 🛠️ Tech Stack
117
-
118
- - **Frontend**: React 19, TypeScript, Vite, Tailwind CSS, Monaco Editor
119
- - **Backend**: Python with Triton integration, structured logging
120
- - **Deployment**: GitHub Pages, automatic deployment
158
+ | **[🏠 Wiki Home](https://github.com/meta-pytorch/tritonparse/wiki)** | Complete documentation and quick navigation |
159
+ | **[📦 Installation](https://github.com/meta-pytorch/tritonparse/wiki/01.-Installation)** | Setup guide for all scenarios |
160
+ | **[📋 Usage Guide](https://github.com/meta-pytorch/tritonparse/wiki/02.-Usage-Guide)** | Complete workflow, reproducer generation, and examples |
161
+ | **[🌐 Web Interface](https://github.com/meta-pytorch/tritonparse/wiki/03.-Web-Interface-Guide)** | Master the visualization interface |
162
+ | **[🔧 Developer Guide](https://github.com/meta-pytorch/tritonparse/wiki/04.-Developer-Guide)** | Contributing and architecture overview |
163
+ | **[📝 Code Formatting](https://github.com/meta-pytorch/tritonparse/wiki/05.-Code-Formatting)** | Formatting standards and tools |
164
+ | **[❓ FAQ](https://github.com/meta-pytorch/tritonparse/wiki/06.-FAQ)** | Quick answers and troubleshooting |
121
165
 
122
166
  ## 📊 Understanding Triton Compilation
123
167
 
@@ -130,9 +174,10 @@ Each stage can be inspected and compared to understand optimization transformati
130
174
  ## 🤝 Contributing
131
175
 
132
176
  We welcome contributions! Please see our **[Developer Guide](https://github.com/meta-pytorch/tritonparse/wiki/04.-Developer-Guide)** for:
133
- - Development setup
134
- - Code formatting standards
135
- - Pull request process
177
+ - Development setup and prerequisites
178
+ - Code formatting standards (**[Formatting Guide](https://github.com/meta-pytorch/tritonparse/wiki/05.-Code-Formatting)**)
179
+ - Pull request and code review process
180
+ - Testing guidelines
136
181
  - Architecture overview
137
182
 
138
183
  ## 📞 Support & Community
@@ -2,7 +2,7 @@ tritonparse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  tritonparse/__main__.py,sha256=wu5N2wk8mvBgyvr2ghmQf4prezAe0_i-p123VVreyYc,62
3
3
  tritonparse/cli.py,sha256=ODfLvbMrrRm5JX6-3XDCu9f59gkJ00sg0R_uzNoVNgw,2471
4
4
  tritonparse/common.py,sha256=9coQbzpyHWAAdv6lx2YQTualiIH49ULJuZTA7VB_V7A,13946
5
- tritonparse/context_manager.py,sha256=M-zRZX8PX8onqBdDeIJ37VGVXmKZ_dFMC6eeZQchyNw,1583
5
+ tritonparse/context_manager.py,sha256=MqAWI8uD1uCeCbmkcEWPejc5_IeY---BdajhsSR9T_E,2221
6
6
  tritonparse/event_diff.py,sha256=yOD6uNxLJroatfx2nEGr-erw24ObOrHU9P6V5pzr8do,4907
7
7
  tritonparse/extract_source_mappings.py,sha256=Z6UxFj2cCE5NCWLQTYPKqUpLfbYhqP8xgCl5mvud9KI,1451
8
8
  tritonparse/ir_parser.py,sha256=1j1tP9jpUN7wH3e01bKUkUPgTMlNXUdp8LKRCC-WTro,9324
@@ -10,7 +10,7 @@ tritonparse/mapper.py,sha256=prrczfi13P7Aa042OrEBsmRF1HW3jDhwxicANgPkWIM,4150
10
10
  tritonparse/shared_vars.py,sha256=fCAW24mx9nENYoNbTy-tZjiN5-w6oGTO_av-Pw1J1TY,653
11
11
  tritonparse/source_type.py,sha256=nmYEQS8rfkIN9BhNhQbkmEvKnvS-3zAxRGLY4TaZdi8,1676
12
12
  tritonparse/sourcemap_utils.py,sha256=qsQmTDuEe9yuUVyxSHRbjTR38gi0hvJEijnPkrJVAV4,2037
13
- tritonparse/structured_logging.py,sha256=y5FKi3mA-iR4SyzCUOfxlZnDpqRhCQC2rKFh2Nrb3kE,46869
13
+ tritonparse/structured_logging.py,sha256=L1xkkCx8Jr9YQbM0Kgtf2g6L3aWMkYOEeFFEOSo8Lkk,60306
14
14
  tritonparse/tp_logger.py,sha256=vXzY7hMDmVnRBGBhIjFZe3nHZzG5NKKPONGUszJhGgU,242
15
15
  tritonparse/trace_processor.py,sha256=brQBt26jdB6-quJXP5-warp2j31JSjOOFJa5ayiUZ5k,12963
16
16
  tritonparse/utils.py,sha256=Jnlptcd79llSDev-_1XyyOnv2izUqv0PEL74A8GF2tc,4565
@@ -21,18 +21,18 @@ tritonparse/reproducer/placeholder_replacer.py,sha256=x9ddhIXVcYoEf6mBpByECPdZeA
21
21
  tritonparse/reproducer/utils.py,sha256=UTclw48vH49g6Z2ljJL5DOZ6Rl4UDudyr0PeUySa3p8,13857
22
22
  tritonparse/reproducer/ingestion/ndjson.py,sha256=pEujTl5xXW2E2DEW8ngxXQ8qP9oawb90wBVTWHDs1jk,7372
23
23
  tritonparse/reproducer/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- tritonparse/reproducer/templates/example.py,sha256=bN75A6sDu9eG7-gePj_VAG5YINVh47rtu8neGp9kV_g,11838
24
+ tritonparse/reproducer/templates/example.py,sha256=gOYKKKptvzyI4mq4eUygi1NrIEQkChm4d1bJXed1w5U,13904
25
25
  tritonparse/reproducer/templates/loader.py,sha256=HqjfThdDVg7q2bYWry78sIaVRkUpkcA8KQDt83YrlVE,1920
26
26
  tritonparse/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  tritonparse/tools/decompress_bin_ndjson.py,sha256=kpt7DM_sSA334F1X45xdkP2OR9LuB27Pc50EkGr6CPM,4144
28
28
  tritonparse/tools/disasm.py,sha256=c4HmNNoPPeXPQBQkPVcMaHwDHbHNZNxuqXn4UIIs1Z0,2434
29
29
  tritonparse/tools/format_fix.py,sha256=Ol0Sjui8D7OzHwbamAfGnq8V5Y63uwNaFTKSORN5HkQ,3867
30
- tritonparse/tools/load_tensor.py,sha256=tfdmNVd9gsZqO6msQBhbXIhOvUzgc83yF64k2GDWPNk,2122
30
+ tritonparse/tools/load_tensor.py,sha256=94-TiSYlpXJx4MPmGK1ovmZlTt56Q_B3KQeCPaA6Cnw,2734
31
31
  tritonparse/tools/prettify_ndjson.py,sha256=r2YlHwFDTHgML7KljRmMsHaDg29q8gOQAgyDKWJhxRM,11062
32
32
  tritonparse/tools/readme.md,sha256=w6PWYfYnRgoPArLjxG9rVrpcLUkoVMGuRlbpF-o0IQM,110
33
- tritonparse-0.2.4.dev20251007071533.dist-info/licenses/LICENSE,sha256=4ZciugpyN7wcM4L-9pyDh_etvMUeIfBhDTyH1zeZlQM,1515
34
- tritonparse-0.2.4.dev20251007071533.dist-info/METADATA,sha256=WR3AE9g_zAEMhY_vBgU9yNvqv3GhYNHlwh-9tjUpP94,6580
35
- tritonparse-0.2.4.dev20251007071533.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
- tritonparse-0.2.4.dev20251007071533.dist-info/entry_points.txt,sha256=wEXdaieDoRRCCdhEv2p_C68iytnaXU_2pwt5CqjfbWY,56
37
- tritonparse-0.2.4.dev20251007071533.dist-info/top_level.txt,sha256=ITcTKgp3vf_bXV9vixuQU9IrZa3L1EfDSZwvRzRaoJU,12
38
- tritonparse-0.2.4.dev20251007071533.dist-info/RECORD,,
33
+ tritonparse-0.2.4.dev20251009071511.dist-info/licenses/LICENSE,sha256=4ZciugpyN7wcM4L-9pyDh_etvMUeIfBhDTyH1zeZlQM,1515
34
+ tritonparse-0.2.4.dev20251009071511.dist-info/METADATA,sha256=_kKjKy0Btt-CMFJyBS3giCWw07kTqht3JWybYpgC_p4,8250
35
+ tritonparse-0.2.4.dev20251009071511.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
+ tritonparse-0.2.4.dev20251009071511.dist-info/entry_points.txt,sha256=wEXdaieDoRRCCdhEv2p_C68iytnaXU_2pwt5CqjfbWY,56
37
+ tritonparse-0.2.4.dev20251009071511.dist-info/top_level.txt,sha256=ITcTKgp3vf_bXV9vixuQU9IrZa3L1EfDSZwvRzRaoJU,12
38
+ tritonparse-0.2.4.dev20251009071511.dist-info/RECORD,,