PyPI - tritonparse - Versions diffs - 0.1.1__py3-none-any.whl - Mend

tritonparse 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tritonparse might be problematic. Click here for more details.

Files changed (40) hide show

tritonparse/__init__.py +0 -0
tritonparse/common.py +409 -0
tritonparse/event_diff.py +120 -0
tritonparse/extract_source_mappings.py +49 -0
tritonparse/ir_parser.py +220 -0
tritonparse/mapper.py +100 -0
tritonparse/reproducer/__init__.py +21 -0
tritonparse/reproducer/__main__.py +81 -0
tritonparse/reproducer/cli.py +37 -0
tritonparse/reproducer/config.py +15 -0
tritonparse/reproducer/factory.py +16 -0
tritonparse/reproducer/ingestion/__init__.py +6 -0
tritonparse/reproducer/ingestion/ndjson.py +165 -0
tritonparse/reproducer/orchestrator.py +65 -0
tritonparse/reproducer/param_generator.py +142 -0
tritonparse/reproducer/prompts/__init__.py +1 -0
tritonparse/reproducer/prompts/loader.py +18 -0
tritonparse/reproducer/providers/__init__.py +1 -0
tritonparse/reproducer/providers/base.py +14 -0
tritonparse/reproducer/providers/gemini.py +47 -0
tritonparse/reproducer/runtime/__init__.py +1 -0
tritonparse/reproducer/runtime/executor.py +13 -0
tritonparse/reproducer/utils/io.py +6 -0
tritonparse/shared_vars.py +9 -0
tritonparse/source_type.py +56 -0
tritonparse/sourcemap_utils.py +72 -0
tritonparse/structured_logging.py +1046 -0
tritonparse/tools/__init__.py +0 -0
tritonparse/tools/decompress_bin_ndjson.py +118 -0
tritonparse/tools/format_fix.py +149 -0
tritonparse/tools/load_tensor.py +58 -0
tritonparse/tools/prettify_ndjson.py +315 -0
tritonparse/tp_logger.py +9 -0
tritonparse/trace_processor.py +331 -0
tritonparse/utils.py +156 -0
tritonparse-0.1.1.dist-info/METADATA +10 -0
tritonparse-0.1.1.dist-info/RECORD +40 -0
tritonparse-0.1.1.dist-info/WHEEL +5 -0
tritonparse-0.1.1.dist-info/licenses/LICENSE +29 -0
tritonparse-0.1.1.dist-info/top_level.txt +1 -0

tritonparse/ir_parser.py ADDED Viewed

@@ -0,0 +1,220 @@
+import logging
+import os
+import re
+from collections import defaultdict
+from typing import Any, Dict, List
+logger = logging.getLogger("SourceMapping")
+# the definition of the #loc directive. they are in the bottom of the IR files
+# Example:#loc2 = loc("/tmp/torchinductor_yhao/yp/abcdef.py":20:28)
+LOC_PATTERN = re.compile(r'#loc(\d*) = loc\("([^"]+)":(\d+):(\d+)\)')
+# the reference to the #loc directive. they are in the end of lines of the IR files
+# Example: loc(#loc2)
+CODE_LOC_PATTERN = re.compile(r".*loc\(#loc(\d*)\)\s*$")
+# this pattern is used in the first function arguments line.
+DIRECT_FILE_PATTERN = re.compile(r'.*loc\("([^"]+)":(\d+):(\d+)\)')
+# the definition of the PTX loc directive.
+# Example: .loc 1 0 50 // abcdef.py:0:50
+PTX_LOC_PATTERN = re.compile(
+    r"^\s*\.loc\s+\d+\s+(\d+)\s+(\d+)\s+//\s*(.+?):(\d+):(\d+)"
+)
+# the definition of the AMDGCN loc directive.
+# Example: .loc	1 32 30                         ; abcd.py:32:30
+# .loc	1 32 46 is_stmt 0               ; abcd.py:32:46
+AMDGCN_LOC_PATTERN = re.compile(
+    r".*loc\s+(\d+)\s+(\d+)\s+(\d+)(?:\s+[^;]*)?;\s*(.+?):(\d+):(\d+)"
+)
+def extract_loc_definitions(ir_content: str) -> Dict[str, Dict[str, Any]]:
+    """
+    Extracts location definitions from the given IR content.
+    This function searches for #loc directives in the provided IR content string.
+    It identifies the main #loc directive, which is a special case located at the top
+    of the IR files, and any subsequent #loc directives that define source file locations.
+    Args:
+        ir_content (str): The content of the IR file as a string.
+    Returns:
+        Dict[str, Dict[str, Any]]: A dictionary mapping location IDs to their corresponding
+        file names, line numbers, and column numbers.
+    """
+    locations = {}
+    # The first #loc directive is a special case. It locates at the top of the IR files
+    main_match = re.search(r'#loc = loc\("([^"]+)":(\d+):(\d+)\)', ir_content)
+    if main_match:
+        locations["1"] = {
+            "file": main_match.group(1),
+            "line": int(main_match.group(2)),
+            "column": int(main_match.group(3)),
+        }
+    # #loc1 = loc(unknown) is another special case. We ignore it.
+    for loc_id, filename, line, col in LOC_PATTERN.findall(ir_content):
+        key = loc_id
+        locations[key] = {"file": filename, "line": int(line), "column": int(col)}
+    return locations
+def extract_code_locations(ir_content: str) -> Dict[int, str]:
+    """
+    Extracts code location mappings from the given IR content.
+    This function scans through the provided IR content line by line and identifies
+    lines that contain location references. It uses regular expressions to match
+    both the #loc directives and direct file references. The function returns a
+    dictionary mapping line numbers to their corresponding location identifiers.
+    Limitations:
+        For the first function arguments line, it may use some #loc(file:line:col), DIRECT_FILE_PATTERN, we only use the first location reference.
+    Args:
+        ir_content (str): The content of the IR file as a string.
+    Returns:
+        Dict[int, str]: A dictionary mapping line numbers to location identifiers,
+        which can be either a #loc identifier or a direct file reference.
+    """
+    line_to_loc = {}
+    for i, line in enumerate(ir_content.split("\n"), start=1):
+        if m := CODE_LOC_PATTERN.search(line):
+            line_to_loc[i] = m.group(1) or "0"
+        elif m := DIRECT_FILE_PATTERN.search(line):
+            file_path, ln, col = m.groups()
+            line_to_loc[i] = f"direct:{file_path}:{ln}:{col}"
+    return line_to_loc
+def extract_ptx_amdgcn_mappings(
+    content: str, other_mappings: List[Any] = None, ir_type: str = "ptx"
+) -> Dict[str, Dict[str, Any]]:
+    """
+    Extract mappings from PTX code where `.loc` directives provide source file and line info.
+    This function only processes code between the function begin and end markers (e.g., "// -- Begin function" and "// -- End function"). The PTX source code line mapping is quite different from that of other IRs. It segments the PTX code using the .loc directive, where each .loc directive provides information for mapping to a source code line.
+    This function:
+    1. Identifies the function boundary in PTX code
+    2. Only processes code within the function boundary
+    3. Maps PTX lines with `.loc` directives to source files and line numbers
+    4. Associates subsequent code lines with the most recent `.loc` directive
+    Args:
+        ptx_content: The content of the PTX file
+    Returns:
+        Dictionary mapping PTX line numbers to source location information
+    """
+    mappings = {}
+    current_mapping = None
+    # Mark function scope
+    function_start_line = 0
+    function_end_line = 0
+    # filename: {file_path, ...}
+    referenced_files = defaultdict(set)
+    if other_mappings is None:
+        other_mappings = []
+    for other in other_mappings:
+        for _, info in other.items():
+            if "file" in info:
+                file_name = os.path.basename(info["file"])
+                referenced_files[file_name].add(info["file"])
+    def get_file_path(filename: str) -> str:
+        file_path = filename
+        if not os.path.isabs(filename):
+            logger.debug(
+                f"Filename '{filename}' does not contain a path. Attempting to resolve."
+            )
+            # Attempt to resolve the filename to a full path using referenced_files
+            if filename in referenced_files:
+                if len(referenced_files[filename]) > 1:
+                    logger.debug(
+                        f"Filename '{filename}' has multiple file paths. Using the first one."
+                    )
+                file_path = list(referenced_files[filename])[0]
+                logger.debug(f"Resolved filename '{filename}' to {file_path}")
+            else:
+                logger.debug(f"Filename '{filename}' not found in referenced files.")
+        return file_path
+    # Regular expressions to match function start and end markers
+    # @TODO: need to double check if the PTX content only contains one function
+    begin_func_pattern = re.compile(
+        r"(?:(?://|;)\s*(?:\.globl\s+\S+\s+)?|\.globl\s+\S+\s+;\s*)--\s*Begin function"
+    )
+    end_func_pattern = re.compile(r"(?://|;)\s*--\s*End function")
+    # First scan: find function boundaries
+    lines = content.split("\n")
+    for i, line in enumerate(lines, 1):
+        if begin_func_pattern.search(line) and function_start_line == 0:
+            function_start_line = i
+        elif end_func_pattern.search(line) and function_start_line > 0:
+            function_end_line = i
+            break
+    # If no function boundaries are found, return empty mapping
+    if function_start_line == 0 or function_end_line == 0:
+        logger.warning(
+            f"Could not identify {ir_type} function boundaries. No {ir_type} mappings generated."
+        )
+        return mappings
+    logger.debug(
+        f"Processing {ir_type} function from line {function_start_line} to {function_end_line}"
+    )
+    is_ptx = ir_type == "ptx"
+    is_amdgcn = ir_type == "amdgcn"
+    tmp_loc_pattern = PTX_LOC_PATTERN if is_ptx else AMDGCN_LOC_PATTERN
+    # Second scan: process code within function body
+    # pay attention to the line number, it starts from 0 but the function_start_line starts from 1
+    for i, line in enumerate(
+        lines[function_start_line:function_end_line], start=function_start_line + 1
+    ):
+        try:
+            # Check .loc directive line
+            match = tmp_loc_pattern.match(line)
+            if match:
+                if is_ptx:
+                    py_line, py_col, filename, _, _ = match.groups()
+                elif is_amdgcn:
+                    py_file_index, py_line, py_col, filename, _, _ = match.groups()
+                else:
+                    logger.error(f"Unknown IR type: {ir_type}")
+                    raise ValueError(f"Unknown IR type: {ir_type}")
+                file_path = get_file_path(filename)
+                # Create new mapping
+                current_mapping = {
+                    "file": file_path,
+                    "line": int(py_line),
+                    "column": int(py_col),
+                    f"{ir_type}_line": i,
+                }
+                # Store mapping
+                mappings[str(i)] = current_mapping
+            elif current_mapping:
+                # For lines without their own .loc after .loc directive, associate with the nearest .loc mapping
+                # Only process non-empty, non-comment meaningful code lines
+                line_content = line.strip()
+                if line_content and not (
+                    (is_ptx and line_content.startswith("//"))
+                    or (is_amdgcn and line_content.startswith(";"))
+                ):
+                    mappings[str(i)] = {
+                        "file": current_mapping["file"],
+                        "line": current_mapping["line"],
+                        "column": current_mapping["column"],
+                        f"{ir_type}_line": i,
+                    }
+        except Exception as e:
+            logger.error(f"Error processing line {i}: {e}")
+            logger.error(f"Line content: {line}")
+            raise e
+    return mappings

tritonparse/mapper.py ADDED Viewed

@@ -0,0 +1,100 @@
+import logging
+from collections import defaultdict
+from typing import Any, Dict, List, Tuple
+logger = logging.getLogger("SourceMapping")
+def create_python_mapping(
+    ir_maps: List[Tuple[str, Dict[str, Dict[str, Any]]]],
+) -> Dict[int, Dict[str, List[int]]]:
+    """
+    Create a mapping from Python source code to IR mappings. We assume there is only one Python source code for each triton kernel.
+    Args:
+        ir_maps: A list of tuples containing the IR type and the IR mappings.
+    Returns:
+        A dictionary mapping Python source code line numbers to their corresponding IR mappings.
+    """
+    py_map = defaultdict(lambda: defaultdict(list))
+    for ir_type, ir_map in ir_maps:
+        for line_number, info in ir_map.items():
+            py_line_number: int = info["line"]
+            py_map[py_line_number][f"{ir_type}_lines"].append(line_number)
+    return {k: dict(v) for k, v in py_map.items()}
+def create_ir_mapping(
+    source_map: Dict[str, Dict[str, Any]], target_map: Dict[str, Dict[str, Any]]
+) -> Dict[str, List[int]]:
+    """
+    Create a mapping from source IR lines to target IR lines.
+    This function takes two mappings: one for source IR and one for target IR, and creates a new mapping
+    that associates lines in the source IR with corresponding lines in the target IR based on their file,
+    line, and column information.
+    Args:
+        source_map (Dict[str, Dict[str, Any]]): A dictionary mapping source IR line numbers to their source file,
+            line, and column information.
+        target_map (Dict[str, Dict[str, Any]]): A dictionary mapping target IR line numbers to their source file,
+            line, and column information.
+    Returns:
+        Dict[str, List[int]]: A dictionary mapping source IR line numbers to lists of corresponding target IR line numbers.
+    """
+    source_to_target = defaultdict(list)
+    # Build a mapping from source file locations to target lines
+    for tgt_line, tgt_info in target_map.items():
+        if "file" in tgt_info and "line" in tgt_info:
+            key = f"{tgt_info['file']}:{tgt_info['line']}:{tgt_info.get('column', 0)}"
+            source_to_target[key].append(int(tgt_line))
+    # Map source lines to target lines
+    mapping = {}
+    for src_line, src_info in source_map.items():
+        if "file" in src_info and "line" in src_info:
+            key = f"{src_info['file']}:{src_info['line']}:{src_info.get('column', 0)}"
+            if key in source_to_target:
+                mapping[src_line] = sorted(source_to_target[key])
+    return mapping
+def create_bidirectional_mapping(
+    source_map: Dict[str, Dict[str, Any]],
+    target_map: Dict[str, Dict[str, Any]],
+    source_type: str,
+    target_type: str,
+) -> None:
+    """
+    Create bidirectional mappings between two IR types and update their mapping dictionaries.
+    This function creates mappings from source IR to target IR and vice versa, then
+    updates both mapping dictionaries with the line references.
+    Args:
+        source_map: Dictionary mapping source IR line numbers to source locations
+        target_map: Dictionary mapping target IR line numbers to source locations
+        source_type: String identifier for the source IR type (e.g., 'ttir', 'ttgir', 'ptx')
+        target_type: String identifier for the target IR type (e.g., 'ttir', 'ttgir', 'ptx')
+    """
+    # Create forward mapping (source to target)
+    source_to_target = create_ir_mapping(source_map, target_map)
+    # Add target line references to source mappings
+    for source_line, target_lines in source_to_target.items():
+        if source_line in source_map and target_lines:
+            source_map[source_line][f"{target_type}_lines"] = target_lines
+    # Create reverse mapping (target to source)
+    target_to_source = create_ir_mapping(target_map, source_map)
+    # Add source line references to target mappings
+    for target_line, source_lines in target_to_source.items():
+        if target_line in target_map:
+            target_map[target_line][f"{source_type}_lines"] = source_lines
+    logger.debug(f"Created {source_type} to {target_type} mappings (and reverse)")

tritonparse/reproducer/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Reproducer subpackage: generate runnable Triton repro scripts from traces.
+Contains:
+- ingestion.ndjson: parse NDJSON and build a context bundle
+- orchestrator: LLM-based code generation with optional execute/repair
+- providers: LLM provider protocol and Gemini provider
+- prompts: simple prompt loader and templates
+- runtime.executor: helper to run generated Python scripts
+- param_generator: synthesize tensor/scalar allocations to reduce LLM burden
+"""
+from .ingestion.ndjson import build_context_bundle
+from .orchestrator import generate_from_ndjson
+from .param_generator import generate_allocation_snippet, generate_kwargs_dict
+__all__ = [
+    "build_context_bundle",
+    "generate_from_ndjson",
+    "generate_allocation_snippet",
+    "generate_kwargs_dict",
+]

tritonparse/reproducer/__main__.py ADDED Viewed

@@ -0,0 +1,81 @@
+import argparse
+import sys
+def main() -> None:
+    p = argparse.ArgumentParser(
+        description=(
+            "Generate a runnable Triton repro script from a tritonparse NDJSON" " trace"
+        )
+    )
+    p.add_argument("--ndjson", required=True, help="Path to NDJSON trace file")
+    p.add_argument(
+        "--launch-index",
+        type=int,
+        default=0,
+        help="Launch index to reproduce",
+    )
+    p.add_argument("--out", default="repro.py", help="Output Python file path")
+    p.add_argument(
+        "--execute",
+        action="store_true",
+        help="Execute the generated script",
+    )
+    p.add_argument(
+        "--retries",
+        type=int,
+        default=0,
+        help="Auto-repair attempts if execution fails",
+    )
+    p.add_argument(
+        "--temperature",
+        type=float,
+        help="Override sampling temperature",
+    )
+    p.add_argument(
+        "--max-tokens",
+        type=int,
+        help="Override max tokens for generation",
+    )
+    args = p.parse_args()
+    # Lazy imports to allow `--help` without optional deps installed
+    from .config import load_config
+    from .orchestrator import generate_from_ndjson
+    try:
+        from .factory import make_gemini_provider
+    except Exception:  # pragma: no cover
+        print(
+            "Failed to import provider factory. Ensure optional deps are installed (e.g. google-genai).",
+            file=sys.stderr,
+        )
+        raise
+    cfg = load_config()
+    try:
+        provider = make_gemini_provider()
+    except ModuleNotFoundError:  # pragma: no cover
+        print(
+            "Gemini provider requires 'google-genai'. Install via: pip install google-genai",
+            file=sys.stderr,
+        )
+        sys.exit(2)
+    temperature = args.temperature if args.temperature is not None else cfg.temperature
+    max_tokens = args.max_tokens if args.max_tokens is not None else cfg.max_tokens
+    res = generate_from_ndjson(
+        args.ndjson,
+        provider,
+        launch_index=args.launch_index,
+        out_py=args.out,
+        execute=args.execute,
+        retries=args.retries,
+        temperature=temperature,
+        max_tokens=max_tokens,
+    )
+    print(res)
+if __name__ == "__main__":  # pragma: no cover
+    main()

tritonparse/reproducer/cli.py ADDED Viewed

@@ -0,0 +1,37 @@
+import argparse
+from .config import load_config
+from .factory import make_gemini_provider
+from .orchestrator import generate_from_ndjson
+def add_reproducer_subparser(parser: argparse.ArgumentParser) -> None:
+    sub = parser.add_subparsers(dest="subcommand")
+    repro = sub.add_parser(
+        "repro",
+        help="Generate a runnable Triton repro script from NDJSON",
+    )
+    repro.add_argument("--ndjson", required=True)
+    repro.add_argument("--launch-index", type=int, default=0)
+    repro.add_argument("--out", default="repro.py")
+    repro.add_argument("--execute", action="store_true")
+    repro.add_argument("--retries", type=int, default=0)
+def maybe_handle_reproducer(args: argparse.Namespace) -> bool:
+    if getattr(args, "subcommand", None) != "repro":
+        return False
+    cfg = load_config()
+    provider = make_gemini_provider()
+    res = generate_from_ndjson(
+        args.ndjson,
+        provider,
+        launch_index=args.launch_index,
+        out_py=args.out,
+        execute=args.execute,
+        retries=args.retries,
+        temperature=cfg.temperature,
+        max_tokens=cfg.max_tokens,
+    )
+    print(res)
+    return True

tritonparse/reproducer/config.py ADDED Viewed

@@ -0,0 +1,15 @@
+import os
+from dataclasses import dataclass
+@dataclass
+class ReproducerConfig:
+    project: str = os.getenv("GOOGLE_CLOUD_PROJECT", "")
+    location: str = os.getenv("GOOGLE_LOCATION", "us-central1")
+    model: str = os.getenv("TP_REPRO_MODEL", "gemini-2.5-pro")
+    temperature: float = float(os.getenv("TP_REPRO_TEMPERATURE", "0.1"))
+    max_tokens: int = int(os.getenv("TP_REPRO_MAX_TOKENS", "10240"))
+def load_config() -> ReproducerConfig:
+    return ReproducerConfig()

tritonparse/reproducer/factory.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Provider factory for reproducer.
+Currently supports Gemini only.
+"""
+from .config import load_config
+from .providers.gemini import GeminiProvider
+def make_gemini_provider() -> GeminiProvider:
+    cfg = load_config()
+    return GeminiProvider(
+        project=cfg.project,
+        location=cfg.location,
+        model=cfg.model,
+    )

tritonparse/reproducer/ingestion/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Ingestion utilities for reproducer.
+Currently supports NDJSON trace parsing.
+"""
+__all__ = []

tritonparse/reproducer/ingestion/ndjson.py ADDED Viewed

@@ -0,0 +1,165 @@
+import json
+from typing import Any, Dict, List
+def _iter_events(path: str):
+    with open(path, "r", encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                yield json.loads(line)
+            except json.JSONDecodeError:
+                # skip malformed lines
+                continue
+def _index_compilations(events: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+    idx = {}
+    for e in events:
+        if e.get("event_type") != "compilation":
+            continue
+        payload = e.get("payload") or {}
+        meta = payload.get("metadata") or {}
+        h = meta.get("hash")
+        if h:
+            idx[h] = e
+    return idx
+def _get_launches(events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    return [e for e in events if e.get("event_type") == "launch"]
+def _resolve_kernel_source(
+    launch: Dict[str, Any], comp_idx: Dict[str, Dict[str, Any]]
+) -> str:
+    # In new format, launch has top-level compilation_metadata, not payload.*
+    comp_meta = (
+        launch.get("compilation_metadata")
+        or launch.get("payload", {}).get("compilation_metadata")
+        or {}
+    )
+    h = comp_meta.get("hash")
+    if not h:
+        return ""
+    comp = comp_idx.get(h, {})
+    payload = comp.get("payload") or {}
+    py = payload.get("python_source") or {}
+    return py.get("code", "")
+def _pack_args(args: Dict[str, Any]) -> Dict[str, Any]:
+    packed = {}
+    for k, v in args.items():
+        t = v.get("type") if isinstance(v, dict) else None
+        if t == "tensor":
+            packed[k] = {
+                "type": "tensor",
+                "shape": v.get("shape") if isinstance(v, dict) else None,
+                "dtype": v.get("dtype") if isinstance(v, dict) else None,
+                "device": v.get("device") if isinstance(v, dict) else None,
+                "stride": v.get("stride") if isinstance(v, dict) else None,
+                "is_contiguous": (
+                    v.get("is_contiguous") if isinstance(v, dict) else None
+                ),
+                "numel": v.get("numel") if isinstance(v, dict) else None,
+            }
+        else:
+            # scalar / NoneType etc
+            if isinstance(v, dict):
+                packed[k] = {
+                    "type": v.get("type"),
+                    "value": v.get("value", v.get("repr")),
+                }
+            else:
+                packed[k] = {
+                    "type": None,
+                    "value": v,
+                }
+    return packed
+# Sentinel and helper to normalize extracted argument values
+_SKIP = object()
+def _decode_arg(raw: Any):
+    if not isinstance(raw, dict):
+        return raw
+    t = raw.get("type")
+    if t == "tensor":
+        return _SKIP
+    if t == "NoneType":
+        return None
+    return raw.get("value", raw.get("repr"))
+def build_context_bundle(ndjson_path: str, launch_index: int = 0) -> Dict[str, Any]:
+    events = list(_iter_events(ndjson_path))
+    launches = _get_launches(events)
+    if not launches:
+        raise RuntimeError("No launch events found in NDJSON.")
+    if launch_index < 0 or launch_index >= len(launches):
+        raise IndexError(
+            f"launch_index out of range: {launch_index} (total {len(launches)})"
+        )
+    launch = launches[launch_index]
+    comp_idx = _index_compilations(events)
+    kernel_source = _resolve_kernel_source(launch, comp_idx)
+    # find '@triton.jit' and slice the string
+    jit_marker = "@triton.jit"
+    jit_pos = kernel_source.find(jit_marker)
+    if jit_pos != -1:
+        kernel_source = kernel_source[jit_pos:]
+    # flatten launch fields (support both formats)
+    grid = launch.get("grid") or (launch.get("payload", {})).get("grid")
+    comp_meta = (
+        launch.get("compilation_metadata")
+        or (launch.get("payload", {})).get("compilation_metadata")
+        or {}
+    )
+    extracted_args = (
+        launch.get("extracted_args")
+        or (launch.get("payload", {})).get("extracted_args")
+        or {}
+    )
+    # compile metadata subset we care about
+    compile_block = {
+        "num_warps": comp_meta.get("num_warps"),
+        "num_stages": comp_meta.get("num_stages"),
+        "arch": comp_meta.get("arch"),
+        "backend": comp_meta.get("backend_name") or comp_meta.get("backend"),
+        "triton_version": comp_meta.get("triton_version"),
+        "hash": comp_meta.get("hash"),
+    }
+    # kwargs: include constexpr + explicit scalars used for launch (skip tensor args)
+    kwargs = {}
+    for k, v in extracted_args.items():
+        val = _decode_arg(v)
+        if val is _SKIP:
+            continue
+        kwargs[k] = val
+    # tensor args: only tensors
+    tensor_args = {
+        k: v
+        for k, v in extracted_args.items()
+        if isinstance(v, dict) and v.get("type") == "tensor"
+    }
+    bundle = {
+        "kernel_source": kernel_source,
+        "compile": compile_block,
+        "launch": {
+            "grid": grid,
+            "kwargs": kwargs,
+        },
+        "args": _pack_args(extracted_args),
+        "tensor_args": _pack_args(tensor_args),
+    }
+    return bundle