PyPI - tritonparse - Versions diffs - 0.3.2.dev20251210071601__py3-none-any.whl - Mend

tritonparse 0.3.2.dev20251210071601__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tritonparse might be problematic. Click here for more details.

Files changed (62) hide show

tritonparse/__init__.py +0 -0
tritonparse/__main__.py +7 -0
tritonparse/cli.py +110 -0
tritonparse/common.py +409 -0
tritonparse/context_manager.py +64 -0
tritonparse/event_diff.py +122 -0
tritonparse/extract_source_mappings.py +49 -0
tritonparse/info/__init__.py +30 -0
tritonparse/info/cli.py +121 -0
tritonparse/info/kernel_query.py +209 -0
tritonparse/info/parse_helper.py +70 -0
tritonparse/ir_analysis.py +427 -0
tritonparse/ir_parser.py +365 -0
tritonparse/mapper.py +102 -0
tritonparse/reproducer/__init__.py +0 -0
tritonparse/reproducer/ast_analyzer.py +636 -0
tritonparse/reproducer/cli.py +72 -0
tritonparse/reproducer/consolidated_result.py +52 -0
tritonparse/reproducer/function_extractor.py +228 -0
tritonparse/reproducer/import_info.py +25 -0
tritonparse/reproducer/import_parser.py +178 -0
tritonparse/reproducer/import_resolver.py +151 -0
tritonparse/reproducer/ingestion/ndjson.py +237 -0
tritonparse/reproducer/multi_file_analyzer.py +824 -0
tritonparse/reproducer/orchestrator.py +110 -0
tritonparse/reproducer/placeholder_replacer.py +335 -0
tritonparse/reproducer/templates/__init__.py +0 -0
tritonparse/reproducer/templates/example.py +38 -0
tritonparse/reproducer/templates/loader.py +59 -0
tritonparse/reproducer/templates/tritonbench.py +106 -0
tritonparse/reproducer/templates/utils.py +48 -0
tritonparse/reproducer/tests/__init__.py +0 -0
tritonparse/reproducer/tests/artifacts/__init__.py +5 -0
tritonparse/reproducer/tests/artifacts/triton_fused_kernel.py +65 -0
tritonparse/reproducer/tests/artifacts/triton_preprocess.py +16 -0
tritonparse/reproducer/tests/artifacts/triton_utils.py +14 -0
tritonparse/reproducer/tests/test_import_parser.py +164 -0
tritonparse/reproducer/tests/test_import_resolver.py +88 -0
tritonparse/reproducer/tests/test_multi_file_analyzer.py +118 -0
tritonparse/reproducer/types.py +20 -0
tritonparse/reproducer/utils.py +580 -0
tritonparse/shared_vars.py +12 -0
tritonparse/source_type.py +56 -0
tritonparse/sourcemap_utils.py +96 -0
tritonparse/structured_logging.py +1634 -0
tritonparse/tools/__init__.py +0 -0
tritonparse/tools/decompress_bin_ndjson.py +120 -0
tritonparse/tools/disasm.py +81 -0
tritonparse/tools/extract_irs.py +244 -0
tritonparse/tools/format_fix.py +151 -0
tritonparse/tools/load_tensor.py +76 -0
tritonparse/tools/prettify_ndjson.py +334 -0
tritonparse/tools/readme.md +37 -0
tritonparse/tp_logger.py +9 -0
tritonparse/trace_processor.py +367 -0
tritonparse/utils.py +155 -0
tritonparse-0.3.2.dev20251210071601.dist-info/METADATA +195 -0
tritonparse-0.3.2.dev20251210071601.dist-info/RECORD +62 -0
tritonparse-0.3.2.dev20251210071601.dist-info/WHEEL +5 -0
tritonparse-0.3.2.dev20251210071601.dist-info/entry_points.txt +2 -0
tritonparse-0.3.2.dev20251210071601.dist-info/licenses/LICENSE +29 -0
tritonparse-0.3.2.dev20251210071601.dist-info/top_level.txt +1 -0

tritonparse/event_diff.py ADDED Viewed

@@ -0,0 +1,122 @@
+#  Copyright (c) Meta Platforms, Inc. and affiliates.
+import json
+from collections import defaultdict
+from typing import Any, Dict, List, Tuple
+from .sourcemap_utils import _flatten_dict, _to_ranges, _unflatten_dict
+# Fields that are expected to vary but are not useful to list out in the diff.
+SUMMARY_FIELDS = ["pid", "timestamp", "stream", "function", "data_ptr"]
+def _generate_launch_diff(
+    launches: List[Tuple[Dict[str, Any], int]],
+) -> Tuple[Dict[str, Any], Dict[str, Any], List[Dict[str, int]]]:
+    """
+    Compares a list of launch events and returns sames, diffs, and an index map.
+    """
+    if not launches:
+        return {}, {}, []
+    launch_events = [launch[0] for launch in launches]
+    launch_index_map = [launch[1] for launch in launches]
+    if len(launch_events) == 1:
+        return (
+            _unflatten_dict(_flatten_dict(launch_events[0])),
+            {},
+            _to_ranges(launch_index_map),
+        )
+    # Group values by key
+    data_by_key = defaultdict(lambda: defaultdict(list))
+    for i, launch in enumerate(launch_events):
+        launch_flat = _flatten_dict(launch)
+        for key, value in launch_flat.items():
+            # JSON doesn't support all Python types as values directly, str is safer
+            value_str = json.dumps(value, sort_keys=True)
+            data_by_key[key][value_str].append(i)
+    sames_flat = {}
+    diffs_flat = {}
+    for key, value_groups in data_by_key.items():
+        if len(value_groups) == 1:
+            # This key has the same value across all launches
+            value_str = list(value_groups.keys())[0]
+            sames_flat[key] = json.loads(value_str)
+        else:
+            # This key has different values
+            is_summary = any(summary_key in key for summary_key in SUMMARY_FIELDS)
+            if is_summary:
+                diffs_flat[key] = {
+                    "diff_type": "summary",
+                    "summary_text": f"Varies across {len(value_groups)} unique values",
+                }
+            else:
+                values_dist = []
+                for value_str, indices in value_groups.items():
+                    values_dist.append(
+                        {
+                            "value": json.loads(value_str),
+                            "count": len(indices),
+                            "launches": _to_ranges(indices),
+                        }
+                    )
+                # Sort by first occurrence
+                values_dist.sort(key=lambda x: x["launches"][0]["start"])
+                diffs_flat[key] = {
+                    "diff_type": "distribution",
+                    "values": values_dist,
+                }
+    # Unflatten the results
+    sames_unflattened = _unflatten_dict(sames_flat)
+    diffs_unflattened = _unflatten_dict(diffs_flat)
+    # Special handling for extracted_args to create argument_diff structures
+    if "extracted_args" in sames_unflattened or "extracted_args" in diffs_unflattened:
+        sames_args = sames_unflattened.pop("extracted_args", {})
+        diffs_args_flat = diffs_unflattened.pop("extracted_args", {})
+        all_arg_names = set(sames_args.keys()) | set(diffs_args_flat.keys())
+        final_arg_diffs = {}
+        for arg_name in all_arg_names:
+            if arg_name in diffs_args_flat:
+                # This argument has at least one differing sub-field.
+                arg_sames = {}
+                arg_diffs_internal = {}
+                # Collect all sub-fields for this argument from the original data
+                all_sub_fields = set()
+                for launch in launch_events:
+                    arg_data = launch.get("extracted_args", {}).get(arg_name, {})
+                    all_sub_fields.update(arg_data.keys())
+                for sub_field in all_sub_fields:
+                    flat_key = f"extracted_args.{arg_name}.{sub_field}"
+                    if flat_key in diffs_flat:
+                        arg_diffs_internal[sub_field] = diffs_flat[flat_key]
+                    elif flat_key in sames_flat:
+                        arg_sames[sub_field] = sames_flat[flat_key]
+                if arg_sames or arg_diffs_internal:
+                    final_arg_diffs[arg_name] = {
+                        "diff_type": "argument_diff",
+                        "sames": arg_sames,
+                        "diffs": arg_diffs_internal,
+                    }
+            elif arg_name in sames_args:
+                # This argument is entirely the same across all launches.
+                # We move it back to the main sames dict for consistency.
+                if "extracted_args" not in sames_unflattened:
+                    sames_unflattened["extracted_args"] = {}
+                sames_unflattened["extracted_args"][arg_name] = sames_args[arg_name]
+        if final_arg_diffs:
+            diffs_unflattened["extracted_args"] = final_arg_diffs
+    return sames_unflattened, diffs_unflattened, _to_ranges(launch_index_map)

tritonparse/extract_source_mappings.py ADDED Viewed

@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+#  Copyright (c) Meta Platforms, Inc. and affiliates.
+"""
+Extract source code mappings from Triton trace files and update the original JSON.
+This script reads a JSON trace file containing Triton IR (TTIR, TTGIR) and PTX(AMDGCN),
+and extracts bidirectional mappings between:
+- Python ↔ TTIR
+- Python ↔ TTGIR
+- Python ↔ PTX(AMDGCN)
+- TTIR ↔ TTGIR
+- TTIR ↔ PTX(AMDGCN)
+- TTGIR ↔ PTX(AMDGCN)
+"""
+import argparse
+import logging
+from .trace_processor import parse_single_file
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("SourceMapping")
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Extract source code mappings from Triton trace files."
+    )
+    parser.add_argument("-i", "--input", help="Path to the Triton trace NDJSON file")
+    parser.add_argument(
+        "--output-dir",
+        default=None,
+        help="Directory to save the output files. If not specified, the input file's directory will be used.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default=None,
+        help="Output NDJSON path. If it is None, the default output file name will be set to {input}_mapped.ndjson in the parse function.",
+    )
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    if args.input:
+        parse_single_file(args.input, args.output_dir)
+    else:
+        logger.error("No input file specified.")

tritonparse/info/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+#  Copyright (c) Meta Platforms, Inc. and affiliates.
+"""
+Info module for querying kernel information from NDJSON trace files.
+This module provides core query functions for kernel information:
+- Listing all kernels with their launch counts
+- Finding launch events by kernel name and launch ID
+- Querying launch information for specific kernels
+"""
+from tritonparse.info.kernel_query import (
+    find_launch_index_by_kernel,
+    find_similar_kernels,
+    KernelSummary,
+    LaunchInfo,
+    list_kernels,
+    list_kernels_fast,
+    list_launches_for_kernel,
+)
+__all__ = [
+    "KernelSummary",
+    "LaunchInfo",
+    "list_kernels",
+    "list_kernels_fast",
+    "list_launches_for_kernel",
+    "find_launch_index_by_kernel",
+    "find_similar_kernels",
+]

tritonparse/info/cli.py ADDED Viewed

@@ -0,0 +1,121 @@
+#  Copyright (c) Meta Platforms, Inc. and affiliates.
+"""
+CLI implementation for the info subcommand.
+This module provides command-line interface for querying kernel information
+from NDJSON trace files.
+"""
+import argparse
+import tempfile
+from typing import Optional
+from tritonparse.info.kernel_query import (
+    find_similar_kernels,
+    list_kernels_fast,
+    list_launches_for_kernel,
+)
+from tritonparse.info.parse_helper import parse_and_compress_raw_log
+from tritonparse.tools.prettify_ndjson import load_ndjson
+def _add_info_args(parser: argparse.ArgumentParser) -> None:
+    """Add arguments for the info subcommand."""
+    parser.add_argument(
+        "input",
+        help="Path to ndjson/ndjson.gz/.bin.ndjson file",
+    )
+    parser.add_argument(
+        "--kernel",
+        type=str,
+        default=None,
+        help="Kernel name to list launches for",
+    )
+def info_command(input_path: str, kernel_name: Optional[str] = None) -> None:
+    """
+    Main function for the info command.
+    Args:
+        input_path: Path to ndjson file
+        kernel_name: Optional kernel name to list launches for
+    """
+    # 1. Load and detect type
+    events = load_ndjson(input_path)
+    has_launch_diff = any(e.get("event_type") == "launch_diff" for e in events)
+    # 2. If no launch_diff, auto-parse
+    if not has_launch_diff:
+        print(
+            f"Input file '{input_path}' appears to be raw log (no launch_diff events)."
+        )
+        print("Parsing automatically to generate launch_diff events...")
+        temp_dir = tempfile.mkdtemp(prefix="tritonparse_info_")
+        try:
+            # Parse and compress (reuses parse module's functions)
+            parsed_file = parse_and_compress_raw_log(
+                input_path,
+                output_dir=temp_dir,
+                split_inductor_compilations=False,
+                verbose=False,
+            )
+            # Load compressed file (load_ndjson supports .ndjson.gz)
+            events = load_ndjson(parsed_file)
+            print(f"✓ Parsed and compressed file: {parsed_file}")
+            print(f"  (Temporary directory: {temp_dir})")
+        except Exception as e:
+            raise RuntimeError(f"Failed to parse input file '{input_path}': {e}") from e
+    else:
+        print(f"Using parsed trace file: {input_path}")
+    # 3. Process query
+    if kernel_name:
+        # List launches for specific kernel
+        try:
+            launches = list_launches_for_kernel(events, kernel_name)
+            print(f"\nLaunches for '{kernel_name}':")
+            print("-" * 60)
+            for launch in launches:
+                grid_str = str(launch.grid) if launch.grid else "N/A"
+                print(
+                    f"  id={launch.launch_id:3d}  line {launch.line_index:5d}  grid={grid_str}"
+                )
+        except ValueError as e:
+            error_msg = str(e)
+            print(f"\nError: {error_msg}")
+            # Try to suggest similar kernels
+            try:
+                similar = find_similar_kernels(events, kernel_name, n=3)
+                if similar:
+                    print("\nDid you mean one of these?")
+                    all_kernels = list_kernels_fast(
+                        events
+                    )  # Use fast path for consistency
+                    kernel_dict = {k.name: k for k in all_kernels}
+                    for name in similar:
+                        count = kernel_dict[name].total_launches
+                        print(f"  - {name} ({count} launches)")
+                    print("\nUse 'tritonparseoss info <file>' to list all kernels.")
+            except Exception:
+                pass  # Ignore errors in suggestion
+            raise
+    else:
+        # List all kernels
+        kernels = list_kernels_fast(events)
+        print(f"\nKernels in {input_path}:")
+        print("-" * 60)
+        for kernel in kernels:
+            if kernel.total_launches > 0:
+                max_id = kernel.total_launches - 1
+                print(
+                    f"  {kernel.name:30s} {kernel.total_launches:3d} launches "
+                    f"(id: 0-{max_id})"
+                )
+            else:
+                print(f"  {kernel.name:30s} {kernel.total_launches:3d} launches")

tritonparse/info/kernel_query.py ADDED Viewed

@@ -0,0 +1,209 @@
+#  Copyright (c) Meta Platforms, Inc. and affiliates.
+"""
+Core query functions for kernel information from NDJSON trace files.
+This module provides functions to query kernel launch information from parsed
+event lists. It supports both raw log files and parsed ndjson files (with launch_diff events).
+"""
+import difflib
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Any, Dict, List
+@dataclass
+class KernelSummary:
+    """Summary information about a kernel."""
+    name: str
+    hash: str
+    total_launches: int
+@dataclass
+class LaunchInfo:
+    """Information about a specific kernel launch."""
+    launch_id: int  # 0-based
+    line_index: int  # 0-based (index in events list)
+    grid: List[int]
+def list_kernels(events: List[Dict[str, Any]]) -> List[KernelSummary]:
+    """
+    List all kernels with their launch counts.
+    Args:
+        events: List of parsed event dictionaries from NDJSON file
+    Returns:
+        List of KernelSummary objects, sorted by kernel name
+    """
+    # Count launches per kernel
+    kernel_counts: Dict[str, Dict[str, Any]] = defaultdict(
+        lambda: {"hash": "", "count": 0}
+    )
+    for event in events:
+        if event.get("event_type") != "launch":
+            continue
+        comp_meta = event.get("compilation_metadata", {})
+        kernel_name = comp_meta.get("name")
+        kernel_hash = comp_meta.get("hash", "")
+        if kernel_name:
+            kernel_counts[kernel_name]["hash"] = kernel_hash
+            kernel_counts[kernel_name]["count"] += 1
+    # Convert to KernelSummary list
+    summaries = [
+        KernelSummary(name=name, hash=info["hash"], total_launches=info["count"])
+        for name, info in kernel_counts.items()
+    ]
+    # Sort by kernel name for consistent output
+    summaries.sort(key=lambda x: x.name)
+    return summaries
+def find_launch_index_by_kernel(
+    events: List[Dict[str, Any]], kernel_name: str, launch_id: int
+) -> int:
+    """
+    Find the 0-based line index for a kernel's N-th launch.
+    Args:
+        events: List of parsed event dictionaries
+        kernel_name: Exact kernel name to match (case-sensitive)
+        launch_id: 0-based launch index for the kernel
+    Returns:
+        0-based line index (index in events list)
+    Raises:
+        ValueError: If kernel not found or launch_id out of range
+    """
+    count = 0
+    for i, event in enumerate(events):
+        if event.get("event_type") != "launch":
+            continue
+        comp_meta = event.get("compilation_metadata", {})
+        name = comp_meta.get("name")
+        if name == kernel_name:
+            if count == launch_id:
+                return i
+            count += 1
+    if count == 0:
+        raise ValueError(f"Kernel '{kernel_name}' not found")
+    else:
+        raise ValueError(
+            f"Kernel '{kernel_name}' has only {count} launches, "
+            f"but --launch-id {launch_id} was requested. Valid range: 0 to {count - 1}"
+        )
+def list_launches_for_kernel(
+    events: List[Dict[str, Any]], kernel_name: str
+) -> List[LaunchInfo]:
+    """
+    List all launches for a specific kernel.
+    Args:
+        events: List of parsed event dictionaries
+        kernel_name: Exact kernel name to match (case-sensitive)
+    Returns:
+        List of LaunchInfo objects for the kernel, sorted by launch_id
+    Raises:
+        ValueError: If kernel not found
+    """
+    launches = []
+    launch_id = 0
+    for i, event in enumerate(events):
+        if event.get("event_type") != "launch":
+            continue
+        comp_meta = event.get("compilation_metadata", {})
+        name = comp_meta.get("name")
+        if name == kernel_name:
+            # Extract grid information from launch event
+            grid = event.get("grid", [])
+            launches.append(LaunchInfo(launch_id=launch_id, line_index=i, grid=grid))
+            launch_id += 1
+    if not launches:
+        raise ValueError(f"Kernel '{kernel_name}' not found")
+    return launches
+def find_similar_kernels(
+    events: List[Dict[str, Any]], kernel_name: str, n: int = 3
+) -> List[str]:
+    """
+    Find similar kernel names using fuzzy matching.
+    Args:
+        events: List of parsed event dictionaries
+        kernel_name: Kernel name to find similar matches for
+        n: Maximum number of matches to return
+    Returns:
+        List of similar kernel names (may be empty if no matches found)
+    """
+    all_kernels = list_kernels(events)
+    all_names = [k.name for k in all_kernels]
+    return difflib.get_close_matches(kernel_name, all_names, n=n, cutoff=0.6)
+def list_kernels_fast(events: List[Dict[str, Any]]) -> List[KernelSummary]:
+    """
+    Fast kernel listing using launch_diff events when available.
+    If launch_diff events are present, uses them for fast listing.
+    Otherwise, falls back to list_kernels().
+    Args:
+        events: List of parsed event dictionaries
+    Returns:
+        List of KernelSummary objects, sorted by kernel name
+    """
+    # Check if launch_diff events are available
+    launch_diff_events = [e for e in events if e.get("event_type") == "launch_diff"]
+    if launch_diff_events:
+        # Use launch_diff events for fast listing
+        # Merge kernels with the same name (sum up launches)
+        kernel_dict: Dict[str, KernelSummary] = {}
+        for event in launch_diff_events:
+            name = event.get("name", "")
+            if not name:
+                continue
+            hash_val = event.get("hash", "")
+            launches = event.get("total_launches", 0)
+            if name in kernel_dict:
+                # Merge: sum up launches, keep first hash
+                kernel_dict[name].total_launches += launches
+            else:
+                kernel_dict[name] = KernelSummary(
+                    name=name,
+                    hash=hash_val,
+                    total_launches=launches,
+                )
+        summaries = list(kernel_dict.values())
+        summaries.sort(key=lambda x: x.name)
+        return summaries
+    else:
+        # Fall back to full traversal
+        return list_kernels(events)

tritonparse/info/parse_helper.py ADDED Viewed

@@ -0,0 +1,70 @@
+#  Copyright (c) Meta Platforms, Inc. and affiliates.
+"""
+Helper functions for parsing raw log files in the info module.
+This module provides utilities to parse and compress raw log files,
+reusing functionality from the parse module.
+"""
+from pathlib import Path
+from tritonparse.common import gzip_single_file
+from tritonparse.trace_processor import parse_single_file
+def parse_and_compress_raw_log(
+    input_path: str,
+    output_dir: str,
+    split_inductor_compilations: bool = False,
+    verbose: bool = False,
+) -> Path:
+    """
+    Parse a raw log file, compress it, and return the path to the compressed parsed file.
+    This function reuses the parse module's functionality:
+    - parse_single_file: Parse the file
+    - gzip_single_file: Compress the parsed file
+    Args:
+        input_path: Path to raw log file
+        output_dir: Directory to save parsed file
+        split_inductor_compilations: Whether to split by inductor compilations
+        verbose: Whether to print verbose information
+    Returns:
+        Path to the generated compressed parsed file (.ndjson.gz)
+    Raises:
+        RuntimeError: If parsing fails or parsed file not found
+    """
+    # 1. Parse the file (generates uncompressed .ndjson)
+    parse_single_file(
+        input_path,
+        output_dir=output_dir,
+        split_inductor_compilations=split_inductor_compilations,
+    )
+    # 2. Calculate expected output filename
+    input_path_obj = Path(input_path)
+    file_name = input_path_obj.name
+    if input_path.endswith(".bin.ndjson"):
+        file_name_without_ext = file_name[:-11]  # Remove ".bin.ndjson"
+    else:
+        file_name_without_ext = input_path_obj.stem  # Remove all extensions
+        # If there's still a .ndjson extension, remove it
+        if file_name_without_ext.endswith(".ndjson"):
+            file_name_without_ext = file_name_without_ext[:-7]
+    uncompressed_file = Path(output_dir) / f"{file_name_without_ext}_mapped.ndjson"
+    if not uncompressed_file.exists():
+        raise RuntimeError(
+            f"Failed to generate parsed file. Expected: {uncompressed_file}"
+        )
+    # 3. Compress the file (reusing parse module's function)
+    compressed_file = gzip_single_file(str(uncompressed_file), verbose=verbose)
+    return Path(compressed_file)  # Returns .ndjson.gz path