PyPI - tritonparse - Versions diffs - 0.3.1.dev20251028071524__py3-none-any.whl → 0.3.1.dev20251030071508__py3-none-any.whl - Mend

tritonparse 0.3.1.dev20251028071524py3-none-any.whl → 0.3.1.dev20251030071508py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tritonparse might be problematic. Click here for more details.

Files changed (16) hide show

tritonparse/cli.py CHANGED Viewed

@@ -68,12 +68,20 @@ def main():
         }
         unified_parse(**parse_args)
     elif args.func == "reproduce":
+        replacer = None
+        if args.use_fbcode:
+            from tritonparse.fb.reproducer.replacer import FBCodePlaceholderReplacer
+            replacer = FBCodePlaceholderReplacer()
+            print(f"Using FBCode placeholder replacer for template: {args.template}")
         reproduce(
             input_path=args.input,
             line_index=args.line - 1,  # Convert 1-based line number to 0-based index
             out_dir=args.out_dir,
             template=args.template,
             kernel_import=args.kernel_import,
+            replacer=replacer,
         )
     else:
         raise RuntimeError(f"Unknown command: {args.func}")

tritonparse/ir_analysis.py CHANGED Viewed

@@ -46,20 +46,349 @@ def process_amd_gcn_bufferops(
     return process_amd_bufferop(ir_content, io_keys)
+def find_loop_bounds(ir_content: str) -> list[tuple[int, int]]:
+    """
+    Find the bounds of all scf.for loops in the IR content.
+    These are the only candidates for Software Pipelining (SWP).
+    A loop starts with 'scf.for' and ends when its closing brace '}' is found.
+    Brace counts are tracked to determine when each loop closes.
+    Args:
+        ir_content: The IR content as a string.
+    Returns:
+        A list of tuples (start_line, end_line) for each scf.for loop found.
+        Line numbers are 0-indexed.
+    """
+    if not ir_content:
+        return []
+    loop_bounds: list[tuple[int, int]] = []
+    lines = ir_content.split("\n")
+    # Stack to track loop starts and their brace counts
+    # Each entry is (start_line, brace_count_at_start)
+    loop_stack: list[tuple[int, int]] = []
+    current_brace_count = 0
+    for line_idx, line in enumerate(lines):
+        # Check if this line starts a new scf.for loop
+        if "scf.for" in line:
+            loop_stack.append((line_idx, current_brace_count))
+        # Count braces on this line
+        for char in line:
+            if char == "{":
+                current_brace_count += 1
+            elif char == "}":
+                current_brace_count -= 1
+        # Check if we've closed any loops
+        while loop_stack and current_brace_count <= loop_stack[-1][1]:
+            start_line, _start_brace_count = loop_stack.pop()
+            # The loop ends at this line
+            loop_bounds.append((start_line, line_idx))
+    return loop_bounds
+def find_inner_loop_bounds(ir_content: str) -> list[tuple[int, int]]:
+    """
+    Find the bounds of inner scf.for loops (loops without nested loops inside).
+    Inner loops are the primary candidates for Software Pipelining (SWP) as they
+    represent the innermost computation that can be optimized.
+    Args:
+        ir_content: The IR content as a string.
+    Returns:
+        A list of tuples (start_line, end_line) for each inner scf.for loop found.
+        Line numbers are 0-indexed.
+    """
+    all_loops = find_loop_bounds(ir_content)
+    if not all_loops:
+        return []
+    # Filter to keep only inner loops (loops that don't contain other loops)
+    inner_loops: list[tuple[int, int]] = []
+    for i, (start_i, end_i) in enumerate(all_loops):
+        # Check if any other loop is nested inside this loop
+        has_nested_loop = False
+        for j, (start_j, end_j) in enumerate(all_loops):
+            if i != j:
+                # Check if loop j is nested inside loop i
+                if start_i < start_j and end_j < end_i:
+                    has_nested_loop = True
+                    break
+        # If no nested loops found, this is an inner loop
+        if not has_nested_loop:
+            inner_loops.append((start_i, end_i))
+    return inner_loops
+def find_loop_pipelining(
+    ttir_content: str,
+    ttgir_content: str,
+    ttir_loop_start: int,
+    ttir_loop_end: int,
+    loop_index: int,
+    ttir_to_ttgir_mapping: dict[str, dict],
+    ttgir_to_source_mapping: dict[str, dict],
+    python_source_content: str | None,
+    python_source_start_line: int,
+) -> dict[str, list[str]]:
+    """
+    Find pipelining information for a specific loop by identifying tt.load and tt.dot operations
+    in TTIR and mapping them to their corresponding operations in the original Python source code.
+    For each tt.load or tt.dot operation found in the TTIR loop, this function uses source
+    mappings to find the corresponding operations in TTGIR, then maps them back to the original
+    Python source code. Operations are categorized into three sections:
+    - prologue: Operations that appear before the loop body
+    - loop_body: Operations that appear within the loop body
+    - epilogue: Operations that appear after the loop body
+    Operations are merged together (both loads and dots) and sorted in program order
+    within each section.
+    Args:
+        ttir_content: The TTIR content as a string.
+        ttgir_content: The TTGIR content as a string.
+        ttir_loop_start: The starting line number of the loop in TTIR (0-indexed).
+        ttir_loop_end: The ending line number of the loop in TTIR (0-indexed).
+        ttir_to_ttgir_mapping: Source mapping from TTIR lines to TTGIR lines.
+        ttgir_to_source_mapping: Source mapping from TTGIR lines to original Python source.
+        python_source_content: The original Python source code content.
+    Returns:
+        A dictionary containing:
+        - "prologue": List of Python source line strings in program order
+        - "loop_body": List of Python source line strings in program order
+        - "epilogue": List of Python source line strings in program order
+    """
+    if not ttir_content or not ttgir_content:
+        return {
+            "prologue": [],
+            "loop_body": [],
+            "epilogue": [],
+        }
+    ttir_lines = ttir_content.split("\n")
+    ttgir_lines = ttgir_content.split("\n")
+    python_lines = python_source_content.split("\n") if python_source_content else []
+    def apply_trailing_space(op: str) -> str:
+        """
+        Add a trailing space to all ops to avoid false positives like
+        warp_group_dot and warp_group_dot_wait.
+        """
+        return op + " "
+    # Step 1: Find tt.load and tt.dot operations in TTIR loop
+    ttir_pipeline_lines: list[int] = []
+    pipeline_tt_ops = ["tt.load", "tt.dot"]
+    pipeline_tt_ops = [apply_trailing_space(op) for op in pipeline_tt_ops]
+    pipeline_ttgir_ops = [
+        "tt.load",
+        "tt.dot",
+        "async_copy_global_to_local",
+        "warp_group_dot",
+    ]
+    pipeline_ttgir_ops = [apply_trailing_space(op) for op in pipeline_ttgir_ops]
+    for line_idx in range(ttir_loop_start, min(ttir_loop_end + 1, len(ttir_lines))):
+        line = ttir_lines[line_idx]
+        for op in pipeline_tt_ops:
+            if op in line:
+                ttir_pipeline_lines.append(line_idx)
+                break
+    # Step 2: Find the corresponding loop in TTGIR using source mappings
+    # Map the TTIR loop bounds to TTGIR using source mappings
+    ttgir_inner_loops = find_inner_loop_bounds(ttgir_content)
+    if not ttgir_inner_loops:
+        # No loop found in TTGIR, return empty results
+        return {
+            "prologue": [],
+            "loop_body": [],
+            "epilogue": [],
+        }
+    # Use the first inner loop as the reference
+    # TODO: Implement more sophisticated mapping logic to match TTIR loops to TTGIR loops
+    ttgir_loop_start, ttgir_loop_end = ttgir_inner_loops[loop_index]
+    # Step 3: Map TTIR operations to TTGIR operations using source mappings
+    # and categorize them by their position relative to the TTGIR loop
+    # Store as (line_number, source_line) to maintain order before extracting just the source
+    prologue_ops: list[tuple[int, str]] = []
+    loop_body_ops: list[tuple[int, str]] = []
+    epilogue_ops: list[tuple[int, str]] = []
+    for ttir_line in ttir_pipeline_lines:
+        # Convert 0-indexed line to 1-indexed string key for mapping lookup
+        ttir_line_key = str(ttir_line + 1)
+        # Get the corresponding TTGIR lines from the source mapping
+        if ttir_line_key in ttir_to_ttgir_mapping:
+            ttgir_lines_list = ttir_to_ttgir_mapping[ttir_line_key].get(
+                "ttgir_lines", []
+            )
+            # For each mapped TTGIR line, categorize it
+            for ttgir_line in ttgir_lines_list:
+                # Convert back to 0-indexed
+                ttgir_line_idx = ttgir_line - 1
+                # Get the actual TTGIR line content to check if it's relevant
+                if ttgir_line_idx < len(ttgir_lines):
+                    ttgir_source_line = ttgir_lines[ttgir_line_idx].strip()
+                    # Only keep mappings to the "compute" op.
+                    if any(op in ttgir_source_line for op in pipeline_ttgir_ops):
+                        # Map TTGIR line back to Python source
+                        ttgir_line_key = str(ttgir_line)
+                        python_source_line = ttgir_source_line  # Default to TTGIR line
+                        if ttgir_line_key in ttgir_to_source_mapping:
+                            source_info = ttgir_to_source_mapping[ttgir_line_key]
+                            python_line_num = source_info.get("line")
+                            if python_line_num and python_lines:
+                                # Account for the offset: the Python source may not start at line 1
+                                # python_line_num is the absolute line number in the original file
+                                # python_source_start_line is where the extracted code starts
+                                # So we need to subtract the offset to get the index in our python_lines array
+                                python_line_idx = (
+                                    python_line_num - python_source_start_line
+                                )
+                                if 0 <= python_line_idx < len(python_lines):
+                                    python_source_line = python_lines[
+                                        python_line_idx
+                                    ].strip()
+                        if ttgir_line_idx < ttgir_loop_start:
+                            prologue_ops.append((ttgir_line_idx, python_source_line))
+                        elif ttgir_loop_start <= ttgir_line_idx <= ttgir_loop_end:
+                            loop_body_ops.append((ttgir_line_idx, python_source_line))
+                        else:
+                            epilogue_ops.append((ttgir_line_idx, python_source_line))
+    # Step 4: Sort each section by line number to maintain program order
+    prologue_ops.sort(key=lambda x: x[0])
+    loop_body_ops.sort(key=lambda x: x[0])
+    epilogue_ops.sort(key=lambda x: x[0])
+    # Extract just the source lines (without line numbers)
+    prologue_lines = [line for _, line in prologue_ops]
+    loop_body_lines = [line for _, line in loop_body_ops]
+    epilogue_lines = [line for _, line in epilogue_ops]
+    # Log the pipelining results
+    logger.info(
+        f"Loop pipelining results (TTIR lines {ttir_loop_start}-{ttir_loop_end}):"
+    )
+    logger.info(f"  Prologue ({len(prologue_lines)} ops):")
+    for line in prologue_lines:
+        logger.info(f"    {line}")
+    logger.info(f"  Loop Body ({len(loop_body_lines)} ops):")
+    for line in loop_body_lines:
+        logger.info(f"    {line}")
+    logger.info(f"  Epilogue ({len(epilogue_lines)} ops):")
+    for line in epilogue_lines:
+        logger.info(f"    {line}")
+    return {
+        "prologue": prologue_lines,
+        "loop_body": loop_body_lines,
+        "epilogue": epilogue_lines,
+    }
+def generate_loop_schedule(
+    ttir_key: str,
+    ttgir_key: str,
+    file_content: dict[str, str],
+    file_path: dict[str, str],
+    source_mappings: dict[str, dict],
+    python_source_content: str | None,
+    python_source_start_line: int,
+) -> list[dict]:
+    """
+    Generate loop schedule information by finding inner scf.for loops in TTIR
+    and analyzing their pipelining potential using source mappings.
+    Only inner loops (loops without nested loops) are considered as they are
+    the primary candidates for Software Pipelining (SWP).
+    Args:
+        ttir_key: Key for the TTIR file.
+        ttgir_key: Key for the TTGIR file.
+        file_content: Dictionary mapping file keys to content.
+        file_path: Dictionary mapping file keys to file paths.
+        source_mappings: Dictionary containing source mappings between IR stages.
+        python_source_content: The original Python source code content.
+        python_source_start_line: The starting line number of the Python source in the original file.
+    Returns:
+        A list of dictionaries, each containing:
+        - "loop_bounds": Tuple of (start_line, end_line) for the loop in TTIR
+        - "pipelining": Dictionary with Python source lines for operations
+    """
+    ttir_content = load_ir_contents(ttir_key, file_content, file_path)
+    ttgir_content = load_ir_contents(ttgir_key, file_content, file_path)
+    # Get the TTIR to TTGIR mapping and TTGIR to source mapping
+    ttir_to_ttgir_mapping = source_mappings.get("ttir", {})
+    ttgir_to_source_mapping = source_mappings.get("ttgir", {})
+    # Find only inner loops (loops without nested loops inside)
+    inner_loop_bounds = find_inner_loop_bounds(ttir_content)
+    # TODO: Fix loop mapping with multiple loops.
+    inner_loop_bounds = inner_loop_bounds[:1]
+    # For each inner loop, find pipelining information
+    loop_schedules = []
+    for i, (loop_start, loop_end) in enumerate(inner_loop_bounds):
+        pipelining_info = find_loop_pipelining(
+            ttir_content,
+            ttgir_content,
+            loop_start,
+            loop_end,
+            i,
+            ttir_to_ttgir_mapping,
+            ttgir_to_source_mapping,
+            python_source_content,
+            python_source_start_line,
+        )
+        loop_schedules.append(pipelining_info)
+    return loop_schedules
 def _generate_ir_analysis(entry: str):
     payload = entry.setdefault("payload", {})
     file_content = payload.get("file_content", {})
     file_path = payload.get("file_path", {})
+    source_mappings = payload.get("source_mappings", {})
     # Find the IR file keys
+    ttir_key = next((k for k in file_content if k.endswith(".ttir")), None)
     ttgir_key = next((k for k in file_content if k.endswith(".ttgir")), None)
     amdgcn_key = next((k for k in file_content if k.endswith(".amdgcn")), None)
     # Skip if no IR files found
-    if not (ttgir_key or amdgcn_key):
-        logger.debug("No AMD IR found")
+    if not (ttir_key or ttgir_key or amdgcn_key):
+        logger.debug("No IR found")
         return {}
     ir_analysis = {}
-    if amdgcn_key:
+    if amdgcn_key and ttgir_key:
+        # Add BufferOps information
         ttgir_bufferops_info = process_amd_ttgir_bufferops(
             ttgir_key, file_content, file_path
         )
@@ -74,4 +403,25 @@ def _generate_ir_analysis(entry: str):
             io_counts["amd_gcn_bufferops_count"] = gcn_bufferops_info
         if io_counts:
             ir_analysis["io_counts"] = io_counts
+    if ttir_key and ttgir_key:
+        # Get Python source content and start line if available
+        python_source_content = None
+        python_source_start_line = 1  # Default to 1 if not available
+        python_source_info = payload.get("python_source")
+        if python_source_info:
+            python_source_content = python_source_info.get("code")
+            python_source_start_line = python_source_info.get("start_line", 1)
+        # Add loop schedule information
+        loop_schedule = generate_loop_schedule(
+            ttir_key,
+            ttgir_key,
+            file_content,
+            file_path,
+            source_mappings,
+            python_source_content,
+            python_source_start_line,
+        )
+        if loop_schedule:
+            ir_analysis["loop_schedules"] = loop_schedule
     return ir_analysis

tritonparse/ir_parser.py CHANGED Viewed

@@ -44,6 +44,14 @@ ALIAS_WITH_NAME_PATTERN = re.compile(
 # Example: #loc20 = loc(#loc16)
 ALIAS_SIMPLE_PATTERN = re.compile(r"#loc(\d+)\s*=\s*loc\(\s*#loc(\d*)\s*\)")
+# Callsite loc definitions in TTIR/TTGIR
+# Example: #loc220 = loc(callsite(#loc57 at #loc190))
+# Captures: loc_id, callee_loc_id, caller_loc_id
+# Note: Uses (\d*) to match optional numbers (for bare #loc references)
+CALLSITE_PATTERN = re.compile(
+    r"#loc(\d+)\s*=\s*loc\(\s*callsite\(\s*#loc(\d*)\s+at\s+#loc(\d*)\s*\)\s*\)"
+)
 def extract_loc_definitions(ir_content: str) -> Dict[str, Dict[str, Any]]:
     """
@@ -141,6 +149,50 @@ def extract_loc_definitions(ir_content: str) -> Dict[str, Dict[str, Any]]:
     for alias_id, target_id in alias_map.items():
         if alias_id not in locations:
             resolve_alias(alias_id)
+    # Collect callsite definitions
+    callsite_defs = []
+    for i, line in enumerate(ir_content.split("\n"), start=1):
+        if m := CALLSITE_PATTERN.search(line):
+            loc_id, callee_id, caller_id = m.groups()
+            # Empty strings map to main loc key ""
+            callsite_defs.append((loc_id, callee_id or "", caller_id or "", i))
+    # Resolve callsite definitions
+    # A callsite inherits the location from its callee (the code being called)
+    # and stores a reference to its caller (the code doing the calling)
+    for loc_id, callee_id, caller_id, def_line in callsite_defs:
+        if loc_id not in locations:  # Avoid overwriting existing definitions
+            if callee_id in locations:
+                # Inherit location info from callee
+                callee_info = locations[callee_id]
+                locations[loc_id] = {
+                    "file": callee_info["file"],
+                    "line": callee_info["line"],
+                    "column": callee_info["column"],
+                    "def_line": def_line,
+                    "is_callsite": True,
+                    "callsite_callee": callee_id,
+                    "callsite_caller": caller_id,
+                }
+            else:
+                logger.warning(
+                    f"Callsite #loc{loc_id} references undefined callee #loc{callee_id}"
+                )
+                # Note: We don't add this callsite to locations since callee is missing
+    # Verify caller references (warning only, don't block)
+    for loc_id, _callee_id, caller_id, _def_line in callsite_defs:
+        if loc_id in locations and caller_id and caller_id not in locations:
+            logger.warning(
+                f"Callsite #loc{loc_id} references undefined caller #loc{caller_id}"
+            )
+    # Attach definition line and alias metadata
+    for k, v in def_line_map.items():
+        if k in locations:
+            locations[k]["def_line"] = v
+    for alias_id, target_id in alias_map.items():
         if alias_id in locations:
             locations[alias_id]["alias_of"] = target_id
             if alias_id in alias_name_map:

tritonparse/reproducer/cli.py CHANGED Viewed

@@ -46,3 +46,8 @@ def _add_reproducer_args(parser: argparse.ArgumentParser) -> None:
             "Defaults to 'default'."
         ),
     )
+    parser.add_argument(
+        "--use-fbcode",
+        action="store_true",
+        help=("Use fbcode to setup repro environment."),
+    )

tritonparse/reproducer/orchestrator.py CHANGED Viewed

@@ -23,7 +23,7 @@ def reproduce(
     template: str,
     replacer: Optional[PlaceholderReplacer] = None,
     kernel_import: KernelImportMode = KernelImportMode.DEFAULT,
-) -> dict[str, Path]:
+) -> dict[str, str]:
     """
     Generate a reproducer script from NDJSON trace file.
@@ -45,7 +45,7 @@ def reproduce(
         f"Built context bundle for kernel: {context_bundle.kernel_info.function_name}"
     )
     out_py_path, temp_json_path = determine_output_paths(
-        out_dir, context_bundle.kernel_info.function_name
+        out_dir, context_bundle.kernel_info.function_name, template
     )
     save_prettified_json(context_bundle.raw_launch_event, temp_json_path)

tritonparse/reproducer/placeholder_replacer.py CHANGED Viewed

@@ -76,21 +76,39 @@ class DefaultPlaceholderReplacer(PlaceholderReplacer):
     - # {{KERNEL_INVOCATION_PLACEHOLDER}}: Replaced with kernel invocation code
     """
+    KERNEL_NAME_PLACEHOLDER = "{{KERNEL_NAME_PLACEHOLDER}}"
+    JSON_FILE_NAME_PLACEHOLDER = "{{JSON_FILE_NAME_PLACEHOLDER}}"
+    IR_OVERRIDE_SETUP_PLACEHOLDER = "# {{IR_OVERRIDE_SETUP_PLACEHOLDER}}"
+    KERNEL_SYSPATH_PLACEHOLDER = "# {{KERNEL_SYSPATH_PLACEHOLDER}}"
+    KERNEL_IMPORT_PLACEHOLDER = "# {{KERNEL_IMPORT_PLACEHOLDER}}"
+    UTILITY_FUNCTIONS_PLACEHOLDER = "# {{UTILITY_FUNCTIONS_PLACEHOLDER}}"
+    KERNEL_INVOCATION_PLACEHOLDER = "# {{KERNEL_INVOCATION_PLACEHOLDER}}"
     def __init__(self):
         super().__init__()
         # Register all default handlers
-        self.register("{{JSON_FILE_NAME_PLACEHOLDER}}", self._replace_json_filename)
+        self.register(self.JSON_FILE_NAME_PLACEHOLDER, self._replace_json_filename)
         self.register(
-            "# {{IR_OVERRIDE_SETUP_PLACEHOLDER}}", self._replace_ir_override_setup
+            self.IR_OVERRIDE_SETUP_PLACEHOLDER, self._replace_ir_override_setup
         )
-        self.register("# {{KERNEL_SYSPATH_PLACEHOLDER}}", self._replace_kernel_syspath)
-        self.register("# {{KERNEL_IMPORT_PLACEHOLDER}}", self._replace_kernel_import)
+        self.register(self.KERNEL_SYSPATH_PLACEHOLDER, self._replace_kernel_syspath)
+        self.register(self.KERNEL_IMPORT_PLACEHOLDER, self._replace_kernel_import)
         self.register(
-            "# {{UTILITY_FUNCTIONS_PLACEHOLDER}}", self._replace_utility_functions
+            self.UTILITY_FUNCTIONS_PLACEHOLDER, self._replace_utility_functions
         )
         self.register(
-            "# {{KERNEL_INVOCATION_PLACEHOLDER}}", self._replace_kernel_invocation
+            self.KERNEL_INVOCATION_PLACEHOLDER, self._replace_kernel_invocation
         )
+        self.register(self.KERNEL_NAME_PLACEHOLDER, self._replace_kernel_name)
+    def _replace_kernel_name(
+        self, code: str, context_bundle: ContextBundle, **kwargs
+    ) -> str:
+        """Replace the kernel name placeholder."""
+        kernel_name = context_bundle.kernel_info.function_name
+        if not kernel_name:
+            raise ValueError("Kernel function name is not available")
+        return code.replace(self.KERNEL_NAME_PLACEHOLDER, kernel_name)
     def _replace_json_filename(
         self, code: str, context_bundle: ContextBundle, **kwargs
@@ -99,7 +117,7 @@ class DefaultPlaceholderReplacer(PlaceholderReplacer):
         temp_json_path = kwargs.get("temp_json_path")
         if temp_json_path is None:
             raise ValueError("temp_json_path is required for JSON filename replacement")
-        return code.replace("{{JSON_FILE_NAME_PLACEHOLDER}}", temp_json_path.name)
+        return code.replace(self.JSON_FILE_NAME_PLACEHOLDER, temp_json_path.name)
     def _replace_ir_override_setup(
         self, code: str, context_bundle: ContextBundle, **kwargs
@@ -108,7 +126,7 @@ class DefaultPlaceholderReplacer(PlaceholderReplacer):
         kernel_import = kwargs.get("kernel_import", KernelImportMode.DEFAULT)
         if kernel_import != KernelImportMode.OVERRIDE_TTIR:
-            return code.replace("# {{IR_OVERRIDE_SETUP_PLACEHOLDER}}", "")
+            return code.replace(self.IR_OVERRIDE_SETUP_PLACEHOLDER, "")
         comp_json_filename = kwargs.get("comp_json_filename")
         if not comp_json_filename:
@@ -158,7 +176,7 @@ _original_autotune = triton.autotune
 triton.autotune = _patched_autotune
 '''
-        return code.replace("# {{IR_OVERRIDE_SETUP_PLACEHOLDER}}", setup_code)
+        return code.replace(self.IR_OVERRIDE_SETUP_PLACEHOLDER, setup_code)
     def _replace_kernel_syspath(
         self, code: str, context_bundle: ContextBundle, **kwargs
@@ -168,15 +186,15 @@ triton.autotune = _patched_autotune
         if kernel_import == KernelImportMode.DEFAULT:
             sys_stmt, _ = _generate_import_statements(context_bundle.kernel_info)
-            return code.replace("# {{KERNEL_SYSPATH_PLACEHOLDER}}", sys_stmt)
+            return code.replace(self.KERNEL_SYSPATH_PLACEHOLDER, sys_stmt)
         elif kernel_import == KernelImportMode.COPY:
             comment = (
                 "# Kernel sys.path setup skipped - kernel source code embedded below"
             )
-            return code.replace("# {{KERNEL_SYSPATH_PLACEHOLDER}}", comment)
+            return code.replace(self.KERNEL_SYSPATH_PLACEHOLDER, comment)
         elif kernel_import == KernelImportMode.OVERRIDE_TTIR:
             comment = "# Kernel sys.path setup skipped - using IR override mode"
-            return code.replace("# {{KERNEL_SYSPATH_PLACEHOLDER}}", comment)
+            return code.replace(self.KERNEL_SYSPATH_PLACEHOLDER, comment)
         else:
             raise ValueError(f"Unknown kernel_import mode: {kernel_import}")
@@ -190,7 +208,7 @@ triton.autotune = _patched_autotune
             _, import_statement = _generate_import_statements(
                 context_bundle.kernel_info
             )
-            return code.replace("# {{KERNEL_IMPORT_PLACEHOLDER}}", import_statement)
+            return code.replace(self.KERNEL_IMPORT_PLACEHOLDER, import_statement)
         elif kernel_import == KernelImportMode.COPY:
             source_code = context_bundle.kernel_info.source_code
             func_name = context_bundle.kernel_info.function_name
@@ -216,10 +234,10 @@ triton.autotune = _patched_autotune
             embedded_code += "\n" + source_code
             embedded_code += f"\n\n# Use kernel function directly\nimported_kernel_function = {func_name}"
-            return code.replace("# {{KERNEL_IMPORT_PLACEHOLDER}}", embedded_code)
+            return code.replace(self.KERNEL_IMPORT_PLACEHOLDER, embedded_code)
         elif kernel_import == KernelImportMode.OVERRIDE_TTIR:
             comment = "# Kernel import skipped - using IR override mode with TTIR"
-            return code.replace("# {{KERNEL_IMPORT_PLACEHOLDER}}", comment)
+            return code.replace(self.KERNEL_IMPORT_PLACEHOLDER, comment)
         else:
             raise ValueError(f"Unknown kernel_import mode: {kernel_import}")
@@ -228,7 +246,7 @@ triton.autotune = _patched_autotune
     ) -> str:
         """Replace the utility functions placeholder with extracted functions."""
         utility_code = extract_utility_functions()
-        return code.replace("# {{UTILITY_FUNCTIONS_PLACEHOLDER}}", utility_code)
+        return code.replace(self.UTILITY_FUNCTIONS_PLACEHOLDER, utility_code)
     def _replace_kernel_invocation(
         self, code: str, context_bundle: ContextBundle, **kwargs
@@ -237,4 +255,4 @@ triton.autotune = _patched_autotune
         source_code = context_bundle.kernel_info.source_code
         pos_args, kw_args = _parse_kernel_signature(source_code)
         invocation_snippet = _generate_invocation_snippet(pos_args, kw_args)
-        return code.replace("# {{KERNEL_INVOCATION_PLACEHOLDER}}", invocation_snippet)
+        return code.replace(self.KERNEL_INVOCATION_PLACEHOLDER, invocation_snippet)

tritonparse/reproducer/templates/example.py CHANGED Viewed

@@ -14,7 +14,7 @@ import torch
 # {{UTILITY_FUNCTIONS_PLACEHOLDER}}
-if __name__ == "__main__":
+def launch_kernel():
     script_dir = Path(__file__).resolve().parent  # noqa: F821
     json_file = script_dir / "{{JSON_FILE_NAME_PLACEHOLDER}}"
     grid, args_dict = create_args_from_json_file(str(json_file))  # noqa: F821
@@ -28,3 +28,7 @@ if __name__ == "__main__":
     torch.cuda.synchronize()
     print("Kernel execution finished.")
+if __name__ == "__main__":
+    launch_kernel()

tritonparse/reproducer/templates/tritonbench.py ADDED Viewed

@@ -0,0 +1,103 @@
+# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from tritonbench.utils.triton_op import (
+    BenchmarkOperator,
+    register_benchmark,
+    REGISTERED_X_VALS,
+)
+imported_kernel_function: Optional[Callable[[Tuple[int], Dict[str, Any]], None]] = None
+# {{IR_OVERRIDE_SETUP_PLACEHOLDER}}
+# {{KERNEL_SYSPATH_PLACEHOLDER}}
+# {{KERNEL_IMPORT_PLACEHOLDER}}
+# {{UTILITY_FUNCTIONS_PLACEHOLDER}}
+assert imported_kernel_function is not None, "imported_kernel_function is missing"
+KERNEL_NAME = "{{KERNEL_NAME_PLACEHOLDER}}"
+REPRO_CONTEXT_FILE_NAME = "{{JSON_FILE_NAME_PLACEHOLDER}}"
+def _get_launch_kernel_args() -> Tuple[Tuple[int], Dict[str, Any]]:
+    script_dir = Path(__file__).resolve().parent  # noqa: F821
+    json_file = script_dir / REPRO_CONTEXT_FILE_NAME
+    grid, args_dict = create_args_from_json_file(json_file)  # noqa: F821, F841
+    print("Recorded kernel arguments dictionary:")
+    for name, arg in args_dict.items():
+        if isinstance(arg, torch.Tensor):
+            print(
+                f"  {name}: Tensor:  {arg.shape} {arg.dtype} stride: {arg.stride()}, is_contiguous: {arg.is_contiguous()}"
+            )
+        else:
+            print(f"  {name}: {arg}")
+    print(f"Grid: {grid}")
+    return tuple(grid), args_dict
+grid, args_dict = _get_launch_kernel_args()
+def _launch_kernel(grid: tuple[int], args_dict: dict[str, Any]):
+    try:
+        assert grid is not None
+        assert args_dict is not None
+        # {{KERNEL_INVOCATION_PLACEHOLDER}}
+    except Exception as e:
+        print(f"Error: {e}")
+        print("Failed to launch kernel!")
+# HACK: @register_x_val doesn't allow us to pass `operator_name`` as a parameter
+tensor_args = {k: v for k, v in args_dict.items() if isinstance(v, torch.Tensor)}
+x_vals_label = ", ".join(tensor_args.keys())
+REGISTERED_X_VALS[KERNEL_NAME] = x_vals_label
+class Operator(BenchmarkOperator):
+    @register_benchmark(operator_name=KERNEL_NAME)
+    def run_kernel(self, grid, args_dict):
+        return lambda: _launch_kernel(grid, args_dict)
+    def get_input_iter(self):
+        yield {"grid": grid, "args_dict": args_dict}
+    def get_x_val(self, example_inputs):
+        tensors_shapes = [
+            tuple(v.shape)
+            for v in example_inputs["args_dict"].values()
+            if isinstance(v, torch.Tensor)
+        ]
+        return tuple(tensors_shapes)
+if __name__ == "__main__":
+    print("do_benchmark...")
+    args = [
+        "--benchmark-name",
+        KERNEL_NAME,
+    ]
+    from tritonbench.utils.parser import get_parser
+    parser = get_parser(args)
+    tb_args, extra_args = parser.parse_known_args(args)
+    bench = Operator(tb_args, extra_args)
+    bench.run()
+    print(bench.output)
+    print("Benchmark completed successfully!")

tritonparse/reproducer/utils.py CHANGED Viewed

@@ -327,7 +327,7 @@ def _create_arg_from_info(arg_info):
         return None
-def determine_output_paths(out_dir: str, kernel_name: str):
+def determine_output_paths(out_dir: str, kernel_name: str, template: str):
     """
     Determine output file paths for reproducer script and context data.
@@ -342,7 +342,12 @@ def determine_output_paths(out_dir: str, kernel_name: str):
     output_directory = Path(out_dir) / kernel_name
     output_directory.mkdir(parents=True, exist_ok=True)
-    out_py_path = output_directory / f"repro_{timestamp}.py"
+    filename_parts = ["repro"]
+    if template != "example":
+        filename_parts.append(template.replace(".", "_"))
+    filename_parts.append(timestamp)
+    filename = "_".join(filename_parts) + ".py"
+    out_py_path = output_directory / filename
     temp_json_path = output_directory / f"repro_context_{timestamp}.json"
     return out_py_path, temp_json_path

tritonparse/trace_processor.py CHANGED Viewed

@@ -77,6 +77,11 @@ def generate_source_mappings(
                 "column": info["column"],
                 f"{ir_type}_line": ln,
             }
+            # Propagate callsite metadata if present
+            if info.get("is_callsite"):
+                entry["is_callsite"] = True
+                entry["callsite_callee"] = info["callsite_callee"]
+                entry["callsite_caller"] = info["callsite_caller"]
             # Propagate alias metadata if present
             if "alias_name" in info:
                 entry["alias_name"] = info["alias_name"]

{tritonparse-0.3.1.dev20251028071524.dist-info → tritonparse-0.3.1.dev20251030071508.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tritonparse
-Version: 0.3.1.dev20251028071524
+Version: 0.3.1.dev20251030071508
 Summary: TritonParse: A Compiler Tracer, Visualizer, and mini-Reproducer Generator for Triton Kernels
 Author-email: Yueming Hao <yhao@meta.com>
 License-Expression: BSD-3-Clause

{tritonparse-0.3.1.dev20251028071524.dist-info → tritonparse-0.3.1.dev20251030071508.dist-info}/RECORD RENAMED Viewed

@@ -1,31 +1,32 @@
 tritonparse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tritonparse/__main__.py,sha256=RXbkALBewcb1xlJBnsQl9IaBRUNln7U8NuRZKT8UdIk,117
-tritonparse/cli.py,sha256=Z3nz_rGYXF6NKmI3LsTomWqM51sJ8Tp-ybxRq7l20BI,2569
+tritonparse/cli.py,sha256=JqBwzpxiFKb0TFdhovDXnz3gwkjeASYgIe311GRBy0o,2876
 tritonparse/common.py,sha256=MJo9bVCgSKkwXpEoUkUczPo_5jOYpJgXLq4UsWYqN3c,13924
 tritonparse/context_manager.py,sha256=OdMn11qbApYL2c9IlbUpcT27r04ZSa4DfvrY2mLA958,2243
 tritonparse/event_diff.py,sha256=USCjfjYr-7Ie-EfZgtCFMZMA1KRzFRDe7yDFy98zYI4,4962
 tritonparse/extract_source_mappings.py,sha256=Z6UxFj2cCE5NCWLQTYPKqUpLfbYhqP8xgCl5mvud9KI,1451
-tritonparse/ir_analysis.py,sha256=MoOXuHsUGZ705R4JnXmlsrBn9gJdLO1Dnf0L5AxcaBM,2551
-tritonparse/ir_parser.py,sha256=MH4RwoNZMBdWUxkFyEhemJ7Aa7-asoba66b06bGPNsk,13237
+tritonparse/ir_analysis.py,sha256=DZz9H8DqW753UkYECnyt6ATC6J1yvLxOHVRHHAZbrVg,16627
+tritonparse/ir_parser.py,sha256=JQ7hsevmhFGmtZ3CoXi4utcomAycBQTT-KFjSva2K8U,15565
 tritonparse/mapper.py,sha256=QBCUMHM9pu3x3ahFp0wyXJbmv9TFGVPdkcLULok1E-k,4205
 tritonparse/shared_vars.py,sha256=RifXq55KisHgspYAmGcaCWY6ZHX8iejFHvwIewvcWZE,707
 tritonparse/source_type.py,sha256=nmYEQS8rfkIN9BhNhQbkmEvKnvS-3zAxRGLY4TaZdi8,1676
 tritonparse/sourcemap_utils.py,sha256=uI02n5Sgnlx7Nc15QAX5N6_tZZMips0PyJuo1n3eouY,2654
 tritonparse/structured_logging.py,sha256=L1xkkCx8Jr9YQbM0Kgtf2g6L3aWMkYOEeFFEOSo8Lkk,60306
 tritonparse/tp_logger.py,sha256=vXzY7hMDmVnRBGBhIjFZe3nHZzG5NKKPONGUszJhGgU,242
-tritonparse/trace_processor.py,sha256=aQPqlnpTtWoGzHYv4BXWUH4nCeUQGSK3o-fj0LD9I0c,14147
+tritonparse/trace_processor.py,sha256=AW4YDrPDayURtmePkFi5m5p6P7OTi1UlTPbbrPzujwY,14418
 tritonparse/utils.py,sha256=Jnlptcd79llSDev-_1XyyOnv2izUqv0PEL74A8GF2tc,4565
 tritonparse/reproducer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tritonparse/reproducer/cli.py,sha256=wk0K8qJhvP9gty2EBMH3WEc3TSFcszvNq3JbfYu_sTw,1577
+tritonparse/reproducer/cli.py,sha256=95AgH9QOlSFpkC5iR89XV9wubv_5vfD1MKl2IxpAIzs,1718
 tritonparse/reproducer/function_extractor.py,sha256=kQr10JKHy8EvAN7ic4Azjz6TYe-udBW2DVmbQ--c1pc,6643
-tritonparse/reproducer/orchestrator.py,sha256=GotBOJjrShN1oCFc_xTMXn8WWT1Jlfap5qcM21dKBpM,3259
-tritonparse/reproducer/placeholder_replacer.py,sha256=ARPZAa9A3Fyit_dIclOKe1JzFgUPBFdHvfy3z20x2E8,9607
+tritonparse/reproducer/orchestrator.py,sha256=OO-eeT4iN-QcB6uXMfH-VoMmiYHJUtrQDQnfneWkuAM,3268
+tritonparse/reproducer/placeholder_replacer.py,sha256=_ehcve5V8_TwemE0NftoO97gZpf4i-n626juAIrixOE,10515
 tritonparse/reproducer/types.py,sha256=86wql3NaGgpkOzx0gDFb5qexNjKExzhL0uIwGU7grrw,564
-tritonparse/reproducer/utils.py,sha256=yFS1Mg2IhRgW-1UNfqjWH5gRSqc8Wbn5Ykre8L-EWcU,16599
+tritonparse/reproducer/utils.py,sha256=DsO7695AuGaFOp4sRSCmsljBeyKnQud9NOKntaUL_VE,16803
 tritonparse/reproducer/ingestion/ndjson.py,sha256=7amSwpbtG-od1-pW18Nm9AiaFc3Etd0-UETXwiYCmgw,7443
 tritonparse/reproducer/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tritonparse/reproducer/templates/example.py,sha256=jR3c8_d7fAFJYaj1DuUuthnI4Xd-_606bWDRdUPMNyo,785
+tritonparse/reproducer/templates/example.py,sha256=mTK_H4BfHntFdk9bybMEYSx8TyKXzQDwMxZok0Urw5s,828
 tritonparse/reproducer/templates/loader.py,sha256=x14KHXkovOIcXFKii3Jx4XjpEhXqUMqp575qAffi370,1975
+tritonparse/reproducer/templates/tritonbench.py,sha256=vRQ9xvIF3pgPHN2nGVBay6ngXScVdicU3agCV3f9Ao0,2875
 tritonparse/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tritonparse/tools/decompress_bin_ndjson.py,sha256=Gn5foDIlxBN5D5wmcdrEmwvxo3_wRlH8ih2U2Ys3RdM,4199
 tritonparse/tools/disasm.py,sha256=c4HmNNoPPeXPQBQkPVcMaHwDHbHNZNxuqXn4UIIs1Z0,2434
@@ -33,9 +34,9 @@ tritonparse/tools/format_fix.py,sha256=ISalg_N_L7Xktag3mLr-G9T6Opxv793s1WG6A9wUt
 tritonparse/tools/load_tensor.py,sha256=7-LbpboKDNJFBLNhiKS3enoqRlVAb55OjPc70PwHXAw,2789
 tritonparse/tools/prettify_ndjson.py,sha256=kR8hmBCv-iJeuzpi2_6CZv9T4_edRQbBOSOPpMm6wrw,11117
 tritonparse/tools/readme.md,sha256=w6PWYfYnRgoPArLjxG9rVrpcLUkoVMGuRlbpF-o0IQM,110
-tritonparse-0.3.1.dev20251028071524.dist-info/licenses/LICENSE,sha256=4ZciugpyN7wcM4L-9pyDh_etvMUeIfBhDTyH1zeZlQM,1515
-tritonparse-0.3.1.dev20251028071524.dist-info/METADATA,sha256=6PkpYMi1Qjf4Lar46WHsMPBo6dQts6i6n3IcwmijYeg,8282
-tritonparse-0.3.1.dev20251028071524.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-tritonparse-0.3.1.dev20251028071524.dist-info/entry_points.txt,sha256=wEXdaieDoRRCCdhEv2p_C68iytnaXU_2pwt5CqjfbWY,56
-tritonparse-0.3.1.dev20251028071524.dist-info/top_level.txt,sha256=ITcTKgp3vf_bXV9vixuQU9IrZa3L1EfDSZwvRzRaoJU,12
-tritonparse-0.3.1.dev20251028071524.dist-info/RECORD,,
+tritonparse-0.3.1.dev20251030071508.dist-info/licenses/LICENSE,sha256=4ZciugpyN7wcM4L-9pyDh_etvMUeIfBhDTyH1zeZlQM,1515
+tritonparse-0.3.1.dev20251030071508.dist-info/METADATA,sha256=GgewTGhlYm2vvn7S39qg5UiXAzJ07JzkETek6YJnwSw,8282
+tritonparse-0.3.1.dev20251030071508.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+tritonparse-0.3.1.dev20251030071508.dist-info/entry_points.txt,sha256=wEXdaieDoRRCCdhEv2p_C68iytnaXU_2pwt5CqjfbWY,56
+tritonparse-0.3.1.dev20251030071508.dist-info/top_level.txt,sha256=ITcTKgp3vf_bXV9vixuQU9IrZa3L1EfDSZwvRzRaoJU,12
+tritonparse-0.3.1.dev20251030071508.dist-info/RECORD,,

{tritonparse-0.3.1.dev20251028071524.dist-info → tritonparse-0.3.1.dev20251030071508.dist-info}/WHEEL RENAMED Viewed

File without changes

{tritonparse-0.3.1.dev20251028071524.dist-info → tritonparse-0.3.1.dev20251030071508.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tritonparse-0.3.1.dev20251028071524.dist-info → tritonparse-0.3.1.dev20251030071508.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{tritonparse-0.3.1.dev20251028071524.dist-info → tritonparse-0.3.1.dev20251030071508.dist-info}/top_level.txt RENAMED Viewed

File without changes

tritonparse 0.3.1.dev20251028071524__py3-none-any.whl → 0.3.1.dev20251030071508__py3-none-any.whl

Potentially problematic release.

tritonparse 0.3.1.dev20251028071524py3-none-any.whl → 0.3.1.dev20251030071508py3-none-any.whl