PyPI - tritonparse - Versions diffs - 0.3.2.dev20251210071601__py3-none-any.whl - Mend

tritonparse 0.3.2.dev20251210071601__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tritonparse might be problematic. Click here for more details.

Files changed (62) hide show

tritonparse/__init__.py +0 -0
tritonparse/__main__.py +7 -0
tritonparse/cli.py +110 -0
tritonparse/common.py +409 -0
tritonparse/context_manager.py +64 -0
tritonparse/event_diff.py +122 -0
tritonparse/extract_source_mappings.py +49 -0
tritonparse/info/__init__.py +30 -0
tritonparse/info/cli.py +121 -0
tritonparse/info/kernel_query.py +209 -0
tritonparse/info/parse_helper.py +70 -0
tritonparse/ir_analysis.py +427 -0
tritonparse/ir_parser.py +365 -0
tritonparse/mapper.py +102 -0
tritonparse/reproducer/__init__.py +0 -0
tritonparse/reproducer/ast_analyzer.py +636 -0
tritonparse/reproducer/cli.py +72 -0
tritonparse/reproducer/consolidated_result.py +52 -0
tritonparse/reproducer/function_extractor.py +228 -0
tritonparse/reproducer/import_info.py +25 -0
tritonparse/reproducer/import_parser.py +178 -0
tritonparse/reproducer/import_resolver.py +151 -0
tritonparse/reproducer/ingestion/ndjson.py +237 -0
tritonparse/reproducer/multi_file_analyzer.py +824 -0
tritonparse/reproducer/orchestrator.py +110 -0
tritonparse/reproducer/placeholder_replacer.py +335 -0
tritonparse/reproducer/templates/__init__.py +0 -0
tritonparse/reproducer/templates/example.py +38 -0
tritonparse/reproducer/templates/loader.py +59 -0
tritonparse/reproducer/templates/tritonbench.py +106 -0
tritonparse/reproducer/templates/utils.py +48 -0
tritonparse/reproducer/tests/__init__.py +0 -0
tritonparse/reproducer/tests/artifacts/__init__.py +5 -0
tritonparse/reproducer/tests/artifacts/triton_fused_kernel.py +65 -0
tritonparse/reproducer/tests/artifacts/triton_preprocess.py +16 -0
tritonparse/reproducer/tests/artifacts/triton_utils.py +14 -0
tritonparse/reproducer/tests/test_import_parser.py +164 -0
tritonparse/reproducer/tests/test_import_resolver.py +88 -0
tritonparse/reproducer/tests/test_multi_file_analyzer.py +118 -0
tritonparse/reproducer/types.py +20 -0
tritonparse/reproducer/utils.py +580 -0
tritonparse/shared_vars.py +12 -0
tritonparse/source_type.py +56 -0
tritonparse/sourcemap_utils.py +96 -0
tritonparse/structured_logging.py +1634 -0
tritonparse/tools/__init__.py +0 -0
tritonparse/tools/decompress_bin_ndjson.py +120 -0
tritonparse/tools/disasm.py +81 -0
tritonparse/tools/extract_irs.py +244 -0
tritonparse/tools/format_fix.py +151 -0
tritonparse/tools/load_tensor.py +76 -0
tritonparse/tools/prettify_ndjson.py +334 -0
tritonparse/tools/readme.md +37 -0
tritonparse/tp_logger.py +9 -0
tritonparse/trace_processor.py +367 -0
tritonparse/utils.py +155 -0
tritonparse-0.3.2.dev20251210071601.dist-info/METADATA +195 -0
tritonparse-0.3.2.dev20251210071601.dist-info/RECORD +62 -0
tritonparse-0.3.2.dev20251210071601.dist-info/WHEEL +5 -0
tritonparse-0.3.2.dev20251210071601.dist-info/entry_points.txt +2 -0
tritonparse-0.3.2.dev20251210071601.dist-info/licenses/LICENSE +29 -0
tritonparse-0.3.2.dev20251210071601.dist-info/top_level.txt +1 -0

tritonparse/ir_parser.py ADDED Viewed

@@ -0,0 +1,365 @@
+#  Copyright (c) Meta Platforms, Inc. and affiliates.
+import logging
+import os
+import re
+from collections import defaultdict
+from typing import Any, Dict, List
+logger = logging.getLogger("SourceMapping")
+# the definition of the #loc directive. they are in the bottom of the IR files
+# Example:#loc2 = loc("/tmp/torchinductor_yhao/yp/abcdef.py":20:28)
+# Note: This should only match numbered locs like #loc1, #loc2, not bare #loc
+LOC_PATTERN = re.compile(r'#loc(\d+) = loc\("([^"]+)":(\d+):(\d+)\)')
+# the reference to the #loc directive. they are in the end of lines of the IR files
+# Example: loc(#loc2)
+CODE_LOC_PATTERN = re.compile(r".*loc\(#loc(\d*)\)\s*$")
+# this pattern is used in the first function arguments line.
+DIRECT_FILE_PATTERN = re.compile(r'.*loc\("([^"]+)":(\d+):(\d+)\)')
+# the definition of the PTX loc directive.
+# Example: .loc 1 0 50 // abcdef.py:0:50
+PTX_LOC_PATTERN = re.compile(
+    r"^\s*\.loc\s+\d+\s+(\d+)\s+(\d+)\s+//\s*(.+?):(\d+):(\d+)"
+)
+# the definition of the AMDGCN loc directive.
+# Example: .loc	1 32 30                         ; abcd.py:32:30
+# .loc	1 32 46 is_stmt 0               ; abcd.py:32:46
+AMDGCN_LOC_PATTERN = re.compile(
+    r".*loc\s+(\d+)\s+(\d+)\s+(\d+)(?:\s+[^;]*)?;\s*(.+?):(\d+):(\d+)"
+)
+# alias loc definitions in TTGIR/TTIR
+# Example: #loc16 = loc("pid"(#loc2))
+# Example: #loc13 = loc("x_ptr"(#loc)) - bare #loc without number
+ALIAS_WITH_NAME_PATTERN = re.compile(
+    r'#loc(\d+)\s*=\s*loc\("([^"]+)"\s*\(\s*#loc(\d*)\s*\)\s*\)'
+)
+# Example: #loc20 = loc(#loc16)
+ALIAS_SIMPLE_PATTERN = re.compile(r"#loc(\d+)\s*=\s*loc\(\s*#loc(\d*)\s*\)")
+# Callsite loc definitions in TTIR/TTGIR
+# Example: #loc220 = loc(callsite(#loc57 at #loc190))
+# Captures: loc_id, callee_loc_id, caller_loc_id
+# Note: Uses (\d*) to match optional numbers (for bare #loc references)
+CALLSITE_PATTERN = re.compile(
+    r"#loc(\d+)\s*=\s*loc\(\s*callsite\(\s*#loc(\d*)\s+at\s+#loc(\d*)\s*\)\s*\)"
+)
+def extract_loc_definitions(ir_content: str) -> Dict[str, Dict[str, Any]]:
+    """
+    Extracts location definitions from the given IR content.
+    This function searches for #loc directives in the provided IR content string.
+    It identifies the main #loc directive, which is a special case located at the top
+    of the IR files, and any subsequent #loc directives that define source file locations.
+    Args:
+        ir_content (str): The content of the IR file as a string.
+    Returns:
+        Dict[str, Dict[str, Any]]: A dictionary mapping location IDs to their corresponding
+        file names, line numbers, and column numbers.
+    """
+    locations = {}
+    # The first #loc directive is a special case. It locates at the top of the IR files
+    # Store it with empty string "" as key to avoid conflict with #loc1
+    main_match = re.search(r'#loc = loc\("([^"]+)":(\d+):(\d+)\)', ir_content)
+    if main_match:
+        locations[""] = {
+            "file": main_match.group(1),
+            "line": int(main_match.group(2)),
+            "column": int(main_match.group(3)),
+        }
+    # #loc1 = loc(unknown) is another special case. We ignore it.
+    for loc_id, filename, line, col in LOC_PATTERN.findall(ir_content):
+        key = loc_id
+        locations[key] = {"file": filename, "line": int(line), "column": int(col)}
+    # Handle alias-style loc definitions that reference another #loc
+    # Build alias map first: alias_id -> target_id
+    alias_map: Dict[str, str] = {}
+    for m in ALIAS_WITH_NAME_PATTERN.finditer(ir_content):
+        alias_id, _name, target_id = m.groups()
+        # Empty target_id means bare #loc, map to "" (main loc key)
+        alias_map[alias_id] = target_id or ""
+    for m in ALIAS_SIMPLE_PATTERN.finditer(ir_content):
+        alias_id, target_id = m.groups()
+        # Empty target_id means bare #loc, map to "" (main loc key)
+        alias_map[alias_id] = target_id or ""
+    # Build definition line map and alias name map by scanning lines
+    def_line_map: Dict[str, int] = {}
+    alias_name_map: Dict[str, str] = {}
+    main_loc_line: int = 0
+    for i, line in enumerate(ir_content.split("\n"), start=1):
+        if m := ALIAS_WITH_NAME_PATTERN.search(line):
+            alias_id, name, target_id = m.groups()
+            def_line_map[alias_id] = i
+            alias_name_map[alias_id] = name
+            # ensure alias map is populated even if only found in line scan
+            # Empty target_id means bare #loc, map to "" (main loc key)
+            alias_map.setdefault(alias_id, target_id or "")
+        elif m := ALIAS_SIMPLE_PATTERN.search(line):
+            alias_id, target_id = m.groups()
+            def_line_map[alias_id] = i
+            # Empty target_id means bare #loc, map to "" (main loc key)
+            alias_map.setdefault(alias_id, target_id or "")
+        if m2 := LOC_PATTERN.search(line):
+            base_id, _fn, _ln, _col = m2.groups()
+            def_line_map[base_id] = i
+        if re.search(r'#loc\s*=\s*loc\("[^"]+":\d+:\d+\)', line):
+            # main #loc = loc("file":line:col) without id
+            main_loc_line = main_loc_line or i
+    # Resolve aliases to base locations (file/line/column)
+    resolving_stack = set()
+    def resolve_alias(current_id: str) -> Dict[str, Any]:
+        # Already a concrete location
+        if current_id in locations:
+            return locations[current_id]
+        # Detect cycles
+        if current_id in resolving_stack:
+            return {}
+        resolving_stack.add(current_id)
+        parent_id = alias_map.get(current_id)
+        result: Dict[str, Any] = {}
+        if parent_id is not None:
+            base = resolve_alias(parent_id)
+            if base:
+                # copy to avoid sharing the same dict by reference
+                result = {
+                    "file": base.get("file"),
+                    "line": base.get("line"),
+                    "column": base.get("column"),
+                }
+                locations[current_id] = result
+        resolving_stack.remove(current_id)
+        return result
+    # Resolve aliases and attach alias metadata
+    for alias_id, target_id in alias_map.items():
+        if alias_id not in locations:
+            resolve_alias(alias_id)
+    # Collect callsite definitions
+    callsite_defs = []
+    for i, line in enumerate(ir_content.split("\n"), start=1):
+        if m := CALLSITE_PATTERN.search(line):
+            loc_id, callee_id, caller_id = m.groups()
+            # Empty strings map to main loc key ""
+            callsite_defs.append((loc_id, callee_id or "", caller_id or "", i))
+    # Resolve callsite definitions
+    # A callsite inherits the location from its callee (the code being called)
+    # and stores a reference to its caller (the code doing the calling)
+    for loc_id, callee_id, caller_id, def_line in callsite_defs:
+        if loc_id not in locations:  # Avoid overwriting existing definitions
+            if callee_id in locations:
+                # Inherit location info from callee
+                callee_info = locations[callee_id]
+                locations[loc_id] = {
+                    "file": callee_info["file"],
+                    "line": callee_info["line"],
+                    "column": callee_info["column"],
+                    "def_line": def_line,
+                    "is_callsite": True,
+                    "callsite_callee": callee_id,
+                    "callsite_caller": caller_id,
+                }
+            else:
+                logger.warning(
+                    f"Callsite #loc{loc_id} references undefined callee #loc{callee_id}"
+                )
+                # Note: We don't add this callsite to locations since callee is missing
+    # Verify caller references (warning only, don't block)
+    for loc_id, _callee_id, caller_id, _def_line in callsite_defs:
+        if loc_id in locations and caller_id and caller_id not in locations:
+            logger.warning(
+                f"Callsite #loc{loc_id} references undefined caller #loc{caller_id}"
+            )
+    # Attach definition line and alias metadata
+    for k, v in def_line_map.items():
+        if k in locations:
+            locations[k]["def_line"] = v
+    for alias_id, target_id in alias_map.items():
+        if alias_id in locations:
+            locations[alias_id]["alias_of"] = target_id
+            if alias_id in alias_name_map:
+                locations[alias_id]["alias_name"] = alias_name_map[alias_id]
+    # Attach definition line metadata
+    for k, v in def_line_map.items():
+        if k in locations:
+            locations[k]["def_line"] = v
+    if main_loc_line and "" in locations:
+        locations[""]["def_line"] = main_loc_line
+    return locations
+def extract_code_locations(ir_content: str) -> Dict[int, str]:
+    """
+    Extracts code location mappings from the given IR content.
+    This function scans through the provided IR content line by line and identifies
+    lines that contain location references. It uses regular expressions to match
+    both the #loc directives and direct file references. The function returns a
+    dictionary mapping line numbers to their corresponding location identifiers.
+    Limitations:
+        For the first function arguments line, it may use some #loc(file:line:col), DIRECT_FILE_PATTERN, we only use the first location reference.
+    Args:
+        ir_content (str): The content of the IR file as a string.
+    Returns:
+        Dict[int, str]: A dictionary mapping line numbers to location identifiers,
+        which can be either a #loc identifier or a direct file reference.
+    """
+    line_to_loc = {}
+    for i, line in enumerate(ir_content.split("\n"), start=1):
+        if m := CODE_LOC_PATTERN.search(line):
+            line_to_loc[i] = m.group(1) or "0"
+        elif m := DIRECT_FILE_PATTERN.search(line):
+            file_path, ln, col = m.groups()
+            line_to_loc[i] = f"direct:{file_path}:{ln}:{col}"
+    return line_to_loc
+def extract_ptx_amdgcn_mappings(
+    content: str, other_mappings: List[Any] = None, ir_type: str = "ptx"
+) -> Dict[str, Dict[str, Any]]:
+    """
+    Extract mappings from PTX code where `.loc` directives provide source file and line info.
+    This function only processes code between the function begin and end markers (e.g., "// -- Begin function" and "// -- End function"). The PTX source code line mapping is quite different from that of other IRs. It segments the PTX code using the .loc directive, where each .loc directive provides information for mapping to a source code line.
+    This function:
+    1. Identifies the function boundary in PTX code
+    2. Only processes code within the function boundary
+    3. Maps PTX lines with `.loc` directives to source files and line numbers
+    4. Associates subsequent code lines with the most recent `.loc` directive
+    Args:
+        ptx_content: The content of the PTX file
+    Returns:
+        Dictionary mapping PTX line numbers to source location information
+    """
+    mappings = {}
+    current_mapping = None
+    # Mark function scope
+    function_start_line = 0
+    function_end_line = 0
+    # filename: {file_path, ...}
+    referenced_files = defaultdict(set)
+    if other_mappings is None:
+        other_mappings = []
+    for other in other_mappings:
+        for _, info in other.items():
+            if "file" in info:
+                file_name = os.path.basename(info["file"])
+                referenced_files[file_name].add(info["file"])
+    def get_file_path(filename: str) -> str:
+        file_path = filename
+        if not os.path.isabs(filename):
+            logger.debug(
+                f"Filename '{filename}' does not contain a path. Attempting to resolve."
+            )
+            # Attempt to resolve the filename to a full path using referenced_files
+            if filename in referenced_files:
+                if len(referenced_files[filename]) > 1:
+                    logger.debug(
+                        f"Filename '{filename}' has multiple file paths. Using the first one."
+                    )
+                file_path = list(referenced_files[filename])[0]
+                logger.debug(f"Resolved filename '{filename}' to {file_path}")
+            else:
+                logger.debug(f"Filename '{filename}' not found in referenced files.")
+        return file_path
+    # Regular expressions to match function start and end markers
+    # @TODO: need to double check if the PTX content only contains one function
+    begin_func_pattern = re.compile(
+        r"(?:(?://|;)\s*(?:\.globl\s+\S+\s+)?|\.globl\s+\S+\s+;\s*)--\s*Begin function"
+    )
+    end_func_pattern = re.compile(r"(?://|;)\s*--\s*End function")
+    # First scan: find function boundaries
+    lines = content.split("\n")
+    for i, line in enumerate(lines, 1):
+        if begin_func_pattern.search(line) and function_start_line == 0:
+            function_start_line = i
+        elif end_func_pattern.search(line) and function_start_line > 0:
+            function_end_line = i
+            break
+    # If no function boundaries are found, return empty mapping
+    if function_start_line == 0 or function_end_line == 0:
+        logger.warning(
+            f"Could not identify {ir_type} function boundaries. No {ir_type} mappings generated."
+        )
+        return mappings
+    logger.debug(
+        f"Processing {ir_type} function from line {function_start_line} to {function_end_line}"
+    )
+    is_ptx = ir_type == "ptx"
+    is_amdgcn = ir_type == "amdgcn"
+    tmp_loc_pattern = PTX_LOC_PATTERN if is_ptx else AMDGCN_LOC_PATTERN
+    # Second scan: process code within function body
+    # pay attention to the line number, it starts from 0 but the function_start_line starts from 1
+    for i, line in enumerate(
+        lines[function_start_line:function_end_line], start=function_start_line + 1
+    ):
+        try:
+            # Check .loc directive line
+            match = tmp_loc_pattern.match(line)
+            if match:
+                if is_ptx:
+                    py_line, py_col, filename, _, _ = match.groups()
+                elif is_amdgcn:
+                    py_file_index, py_line, py_col, filename, _, _ = match.groups()
+                else:
+                    logger.error(f"Unknown IR type: {ir_type}")
+                    raise ValueError(f"Unknown IR type: {ir_type}")
+                file_path = get_file_path(filename)
+                # Create new mapping
+                current_mapping = {
+                    "file": file_path,
+                    "line": int(py_line),
+                    "column": int(py_col),
+                    f"{ir_type}_line": i,
+                }
+                # Store mapping
+                mappings[str(i)] = current_mapping
+            elif current_mapping:
+                # For lines without their own .loc after .loc directive, associate with the nearest .loc mapping
+                # Only process non-empty, non-comment meaningful code lines
+                line_content = line.strip()
+                if line_content and not (
+                    (is_ptx and line_content.startswith("//"))
+                    or (is_amdgcn and line_content.startswith(";"))
+                ):
+                    mappings[str(i)] = {
+                        "file": current_mapping["file"],
+                        "line": current_mapping["line"],
+                        "column": current_mapping["column"],
+                        f"{ir_type}_line": i,
+                    }
+        except Exception as e:
+            logger.error(f"Error processing line {i}: {e}")
+            logger.error(f"Line content: {line}")
+            raise e
+    return mappings

tritonparse/mapper.py ADDED Viewed

@@ -0,0 +1,102 @@
+#  Copyright (c) Meta Platforms, Inc. and affiliates.
+import logging
+from collections import defaultdict
+from typing import Any, Dict, List, Tuple
+logger = logging.getLogger("SourceMapping")
+def create_python_mapping(
+    ir_maps: List[Tuple[str, Dict[str, Dict[str, Any]]]],
+) -> Dict[int, Dict[str, List[int]]]:
+    """
+    Create a mapping from Python source code to IR mappings. We assume there is only one Python source code for each triton kernel.
+    Args:
+        ir_maps: A list of tuples containing the IR type and the IR mappings.
+    Returns:
+        A dictionary mapping Python source code line numbers to their corresponding IR mappings.
+    """
+    py_map = defaultdict(lambda: defaultdict(list))
+    for ir_type, ir_map in ir_maps:
+        for line_number, info in ir_map.items():
+            py_line_number: int = info["line"]
+            py_map[py_line_number][f"{ir_type}_lines"].append(line_number)
+    return {k: dict(v) for k, v in py_map.items()}
+def create_ir_mapping(
+    source_map: Dict[str, Dict[str, Any]], target_map: Dict[str, Dict[str, Any]]
+) -> Dict[str, List[int]]:
+    """
+    Create a mapping from source IR lines to target IR lines.
+    This function takes two mappings: one for source IR and one for target IR, and creates a new mapping
+    that associates lines in the source IR with corresponding lines in the target IR based on their file,
+    line, and column information.
+    Args:
+        source_map (Dict[str, Dict[str, Any]]): A dictionary mapping source IR line numbers to their source file,
+            line, and column information.
+        target_map (Dict[str, Dict[str, Any]]): A dictionary mapping target IR line numbers to their source file,
+            line, and column information.
+    Returns:
+        Dict[str, List[int]]: A dictionary mapping source IR line numbers to lists of corresponding target IR line numbers.
+    """
+    source_to_target = defaultdict(list)
+    # Build a mapping from source file locations to target lines
+    for tgt_line, tgt_info in target_map.items():
+        if "file" in tgt_info and "line" in tgt_info:
+            key = f"{tgt_info['file']}:{tgt_info['line']}:{tgt_info.get('column', 0)}"
+            source_to_target[key].append(int(tgt_line))
+    # Map source lines to target lines
+    mapping = {}
+    for src_line, src_info in source_map.items():
+        if "file" in src_info and "line" in src_info:
+            key = f"{src_info['file']}:{src_info['line']}:{src_info.get('column', 0)}"
+            if key in source_to_target:
+                mapping[src_line] = sorted(source_to_target[key])
+    return mapping
+def create_bidirectional_mapping(
+    source_map: Dict[str, Dict[str, Any]],
+    target_map: Dict[str, Dict[str, Any]],
+    source_type: str,
+    target_type: str,
+) -> None:
+    """
+    Create bidirectional mappings between two IR types and update their mapping dictionaries.
+    This function creates mappings from source IR to target IR and vice versa, then
+    updates both mapping dictionaries with the line references.
+    Args:
+        source_map: Dictionary mapping source IR line numbers to source locations
+        target_map: Dictionary mapping target IR line numbers to source locations
+        source_type: String identifier for the source IR type (e.g., 'ttir', 'ttgir', 'ptx')
+        target_type: String identifier for the target IR type (e.g., 'ttir', 'ttgir', 'ptx')
+    """
+    # Create forward mapping (source to target)
+    source_to_target = create_ir_mapping(source_map, target_map)
+    # Add target line references to source mappings
+    for source_line, target_lines in source_to_target.items():
+        if source_line in source_map and target_lines:
+            source_map[source_line][f"{target_type}_lines"] = target_lines
+    # Create reverse mapping (target to source)
+    target_to_source = create_ir_mapping(target_map, source_map)
+    # Add source line references to target mappings
+    for target_line, source_lines in target_to_source.items():
+        if target_line in target_map:
+            target_map[target_line][f"{source_type}_lines"] = source_lines
+    logger.debug(f"Created {source_type} to {target_type} mappings (and reverse)")

tritonparse/reproducer/__init__.py ADDED Viewed

File without changes