tritonparse 0.3.2.dev20251210071601__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tritonparse might be problematic. Click here for more details.

Files changed (62) hide show
  1. tritonparse/__init__.py +0 -0
  2. tritonparse/__main__.py +7 -0
  3. tritonparse/cli.py +110 -0
  4. tritonparse/common.py +409 -0
  5. tritonparse/context_manager.py +64 -0
  6. tritonparse/event_diff.py +122 -0
  7. tritonparse/extract_source_mappings.py +49 -0
  8. tritonparse/info/__init__.py +30 -0
  9. tritonparse/info/cli.py +121 -0
  10. tritonparse/info/kernel_query.py +209 -0
  11. tritonparse/info/parse_helper.py +70 -0
  12. tritonparse/ir_analysis.py +427 -0
  13. tritonparse/ir_parser.py +365 -0
  14. tritonparse/mapper.py +102 -0
  15. tritonparse/reproducer/__init__.py +0 -0
  16. tritonparse/reproducer/ast_analyzer.py +636 -0
  17. tritonparse/reproducer/cli.py +72 -0
  18. tritonparse/reproducer/consolidated_result.py +52 -0
  19. tritonparse/reproducer/function_extractor.py +228 -0
  20. tritonparse/reproducer/import_info.py +25 -0
  21. tritonparse/reproducer/import_parser.py +178 -0
  22. tritonparse/reproducer/import_resolver.py +151 -0
  23. tritonparse/reproducer/ingestion/ndjson.py +237 -0
  24. tritonparse/reproducer/multi_file_analyzer.py +824 -0
  25. tritonparse/reproducer/orchestrator.py +110 -0
  26. tritonparse/reproducer/placeholder_replacer.py +335 -0
  27. tritonparse/reproducer/templates/__init__.py +0 -0
  28. tritonparse/reproducer/templates/example.py +38 -0
  29. tritonparse/reproducer/templates/loader.py +59 -0
  30. tritonparse/reproducer/templates/tritonbench.py +106 -0
  31. tritonparse/reproducer/templates/utils.py +48 -0
  32. tritonparse/reproducer/tests/__init__.py +0 -0
  33. tritonparse/reproducer/tests/artifacts/__init__.py +5 -0
  34. tritonparse/reproducer/tests/artifacts/triton_fused_kernel.py +65 -0
  35. tritonparse/reproducer/tests/artifacts/triton_preprocess.py +16 -0
  36. tritonparse/reproducer/tests/artifacts/triton_utils.py +14 -0
  37. tritonparse/reproducer/tests/test_import_parser.py +164 -0
  38. tritonparse/reproducer/tests/test_import_resolver.py +88 -0
  39. tritonparse/reproducer/tests/test_multi_file_analyzer.py +118 -0
  40. tritonparse/reproducer/types.py +20 -0
  41. tritonparse/reproducer/utils.py +580 -0
  42. tritonparse/shared_vars.py +12 -0
  43. tritonparse/source_type.py +56 -0
  44. tritonparse/sourcemap_utils.py +96 -0
  45. tritonparse/structured_logging.py +1634 -0
  46. tritonparse/tools/__init__.py +0 -0
  47. tritonparse/tools/decompress_bin_ndjson.py +120 -0
  48. tritonparse/tools/disasm.py +81 -0
  49. tritonparse/tools/extract_irs.py +244 -0
  50. tritonparse/tools/format_fix.py +151 -0
  51. tritonparse/tools/load_tensor.py +76 -0
  52. tritonparse/tools/prettify_ndjson.py +334 -0
  53. tritonparse/tools/readme.md +37 -0
  54. tritonparse/tp_logger.py +9 -0
  55. tritonparse/trace_processor.py +367 -0
  56. tritonparse/utils.py +155 -0
  57. tritonparse-0.3.2.dev20251210071601.dist-info/METADATA +195 -0
  58. tritonparse-0.3.2.dev20251210071601.dist-info/RECORD +62 -0
  59. tritonparse-0.3.2.dev20251210071601.dist-info/WHEEL +5 -0
  60. tritonparse-0.3.2.dev20251210071601.dist-info/entry_points.txt +2 -0
  61. tritonparse-0.3.2.dev20251210071601.dist-info/licenses/LICENSE +29 -0
  62. tritonparse-0.3.2.dev20251210071601.dist-info/top_level.txt +1 -0
@@ -0,0 +1,122 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+
3
+ import json
4
+ from collections import defaultdict
5
+ from typing import Any, Dict, List, Tuple
6
+
7
+ from .sourcemap_utils import _flatten_dict, _to_ranges, _unflatten_dict
8
+
9
+ # Fields that are expected to vary but are not useful to list out in the diff.
10
+ SUMMARY_FIELDS = ["pid", "timestamp", "stream", "function", "data_ptr"]
11
+
12
+
13
+ def _generate_launch_diff(
14
+ launches: List[Tuple[Dict[str, Any], int]],
15
+ ) -> Tuple[Dict[str, Any], Dict[str, Any], List[Dict[str, int]]]:
16
+ """
17
+ Compares a list of launch events and returns sames, diffs, and an index map.
18
+ """
19
+ if not launches:
20
+ return {}, {}, []
21
+
22
+ launch_events = [launch[0] for launch in launches]
23
+ launch_index_map = [launch[1] for launch in launches]
24
+
25
+ if len(launch_events) == 1:
26
+ return (
27
+ _unflatten_dict(_flatten_dict(launch_events[0])),
28
+ {},
29
+ _to_ranges(launch_index_map),
30
+ )
31
+
32
+ # Group values by key
33
+ data_by_key = defaultdict(lambda: defaultdict(list))
34
+ for i, launch in enumerate(launch_events):
35
+ launch_flat = _flatten_dict(launch)
36
+ for key, value in launch_flat.items():
37
+ # JSON doesn't support all Python types as values directly, str is safer
38
+ value_str = json.dumps(value, sort_keys=True)
39
+ data_by_key[key][value_str].append(i)
40
+
41
+ sames_flat = {}
42
+ diffs_flat = {}
43
+
44
+ for key, value_groups in data_by_key.items():
45
+ if len(value_groups) == 1:
46
+ # This key has the same value across all launches
47
+ value_str = list(value_groups.keys())[0]
48
+ sames_flat[key] = json.loads(value_str)
49
+ else:
50
+ # This key has different values
51
+ is_summary = any(summary_key in key for summary_key in SUMMARY_FIELDS)
52
+ if is_summary:
53
+ diffs_flat[key] = {
54
+ "diff_type": "summary",
55
+ "summary_text": f"Varies across {len(value_groups)} unique values",
56
+ }
57
+ else:
58
+ values_dist = []
59
+ for value_str, indices in value_groups.items():
60
+ values_dist.append(
61
+ {
62
+ "value": json.loads(value_str),
63
+ "count": len(indices),
64
+ "launches": _to_ranges(indices),
65
+ }
66
+ )
67
+ # Sort by first occurrence
68
+ values_dist.sort(key=lambda x: x["launches"][0]["start"])
69
+ diffs_flat[key] = {
70
+ "diff_type": "distribution",
71
+ "values": values_dist,
72
+ }
73
+
74
+ # Unflatten the results
75
+ sames_unflattened = _unflatten_dict(sames_flat)
76
+ diffs_unflattened = _unflatten_dict(diffs_flat)
77
+
78
+ # Special handling for extracted_args to create argument_diff structures
79
+ if "extracted_args" in sames_unflattened or "extracted_args" in diffs_unflattened:
80
+ sames_args = sames_unflattened.pop("extracted_args", {})
81
+ diffs_args_flat = diffs_unflattened.pop("extracted_args", {})
82
+
83
+ all_arg_names = set(sames_args.keys()) | set(diffs_args_flat.keys())
84
+
85
+ final_arg_diffs = {}
86
+
87
+ for arg_name in all_arg_names:
88
+ if arg_name in diffs_args_flat:
89
+ # This argument has at least one differing sub-field.
90
+ arg_sames = {}
91
+ arg_diffs_internal = {}
92
+
93
+ # Collect all sub-fields for this argument from the original data
94
+ all_sub_fields = set()
95
+ for launch in launch_events:
96
+ arg_data = launch.get("extracted_args", {}).get(arg_name, {})
97
+ all_sub_fields.update(arg_data.keys())
98
+
99
+ for sub_field in all_sub_fields:
100
+ flat_key = f"extracted_args.{arg_name}.{sub_field}"
101
+ if flat_key in diffs_flat:
102
+ arg_diffs_internal[sub_field] = diffs_flat[flat_key]
103
+ elif flat_key in sames_flat:
104
+ arg_sames[sub_field] = sames_flat[flat_key]
105
+
106
+ if arg_sames or arg_diffs_internal:
107
+ final_arg_diffs[arg_name] = {
108
+ "diff_type": "argument_diff",
109
+ "sames": arg_sames,
110
+ "diffs": arg_diffs_internal,
111
+ }
112
+ elif arg_name in sames_args:
113
+ # This argument is entirely the same across all launches.
114
+ # We move it back to the main sames dict for consistency.
115
+ if "extracted_args" not in sames_unflattened:
116
+ sames_unflattened["extracted_args"] = {}
117
+ sames_unflattened["extracted_args"][arg_name] = sames_args[arg_name]
118
+
119
+ if final_arg_diffs:
120
+ diffs_unflattened["extracted_args"] = final_arg_diffs
121
+
122
+ return sames_unflattened, diffs_unflattened, _to_ranges(launch_index_map)
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+
4
+ """
5
+ Extract source code mappings from Triton trace files and update the original JSON.
6
+ This script reads a JSON trace file containing Triton IR (TTIR, TTGIR) and PTX(AMDGCN),
7
+ and extracts bidirectional mappings between:
8
+ - Python ↔ TTIR
9
+ - Python ↔ TTGIR
10
+ - Python ↔ PTX(AMDGCN)
11
+ - TTIR ↔ TTGIR
12
+ - TTIR ↔ PTX(AMDGCN)
13
+ - TTGIR ↔ PTX(AMDGCN)
14
+ """
15
+
16
+ import argparse
17
+ import logging
18
+
19
+ from .trace_processor import parse_single_file
20
+
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger("SourceMapping")
23
+
24
+
25
+ def parse_args():
26
+ parser = argparse.ArgumentParser(
27
+ description="Extract source code mappings from Triton trace files."
28
+ )
29
+ parser.add_argument("-i", "--input", help="Path to the Triton trace NDJSON file")
30
+ parser.add_argument(
31
+ "--output-dir",
32
+ default=None,
33
+ help="Directory to save the output files. If not specified, the input file's directory will be used.",
34
+ )
35
+ parser.add_argument(
36
+ "-o",
37
+ "--output",
38
+ default=None,
39
+ help="Output NDJSON path. If it is None, the default output file name will be set to {input}_mapped.ndjson in the parse function.",
40
+ )
41
+ return parser.parse_args()
42
+
43
+
44
+ if __name__ == "__main__":
45
+ args = parse_args()
46
+ if args.input:
47
+ parse_single_file(args.input, args.output_dir)
48
+ else:
49
+ logger.error("No input file specified.")
@@ -0,0 +1,30 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+
3
+ """
4
+ Info module for querying kernel information from NDJSON trace files.
5
+
6
+ This module provides core query functions for kernel information:
7
+ - Listing all kernels with their launch counts
8
+ - Finding launch events by kernel name and launch ID
9
+ - Querying launch information for specific kernels
10
+ """
11
+
12
+ from tritonparse.info.kernel_query import (
13
+ find_launch_index_by_kernel,
14
+ find_similar_kernels,
15
+ KernelSummary,
16
+ LaunchInfo,
17
+ list_kernels,
18
+ list_kernels_fast,
19
+ list_launches_for_kernel,
20
+ )
21
+
22
+ __all__ = [
23
+ "KernelSummary",
24
+ "LaunchInfo",
25
+ "list_kernels",
26
+ "list_kernels_fast",
27
+ "list_launches_for_kernel",
28
+ "find_launch_index_by_kernel",
29
+ "find_similar_kernels",
30
+ ]
@@ -0,0 +1,121 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+
3
+ """
4
+ CLI implementation for the info subcommand.
5
+
6
+ This module provides command-line interface for querying kernel information
7
+ from NDJSON trace files.
8
+ """
9
+
10
+ import argparse
11
+ import tempfile
12
+ from typing import Optional
13
+
14
+ from tritonparse.info.kernel_query import (
15
+ find_similar_kernels,
16
+ list_kernels_fast,
17
+ list_launches_for_kernel,
18
+ )
19
+ from tritonparse.info.parse_helper import parse_and_compress_raw_log
20
+ from tritonparse.tools.prettify_ndjson import load_ndjson
21
+
22
+
23
+ def _add_info_args(parser: argparse.ArgumentParser) -> None:
24
+ """Add arguments for the info subcommand."""
25
+ parser.add_argument(
26
+ "input",
27
+ help="Path to ndjson/ndjson.gz/.bin.ndjson file",
28
+ )
29
+ parser.add_argument(
30
+ "--kernel",
31
+ type=str,
32
+ default=None,
33
+ help="Kernel name to list launches for",
34
+ )
35
+
36
+
37
+ def info_command(input_path: str, kernel_name: Optional[str] = None) -> None:
38
+ """
39
+ Main function for the info command.
40
+
41
+ Args:
42
+ input_path: Path to ndjson file
43
+ kernel_name: Optional kernel name to list launches for
44
+ """
45
+ # 1. Load and detect type
46
+ events = load_ndjson(input_path)
47
+ has_launch_diff = any(e.get("event_type") == "launch_diff" for e in events)
48
+
49
+ # 2. If no launch_diff, auto-parse
50
+ if not has_launch_diff:
51
+ print(
52
+ f"Input file '{input_path}' appears to be raw log (no launch_diff events)."
53
+ )
54
+ print("Parsing automatically to generate launch_diff events...")
55
+
56
+ temp_dir = tempfile.mkdtemp(prefix="tritonparse_info_")
57
+
58
+ try:
59
+ # Parse and compress (reuses parse module's functions)
60
+ parsed_file = parse_and_compress_raw_log(
61
+ input_path,
62
+ output_dir=temp_dir,
63
+ split_inductor_compilations=False,
64
+ verbose=False,
65
+ )
66
+
67
+ # Load compressed file (load_ndjson supports .ndjson.gz)
68
+ events = load_ndjson(parsed_file)
69
+
70
+ print(f"✓ Parsed and compressed file: {parsed_file}")
71
+ print(f" (Temporary directory: {temp_dir})")
72
+ except Exception as e:
73
+ raise RuntimeError(f"Failed to parse input file '{input_path}': {e}") from e
74
+ else:
75
+ print(f"Using parsed trace file: {input_path}")
76
+
77
+ # 3. Process query
78
+ if kernel_name:
79
+ # List launches for specific kernel
80
+ try:
81
+ launches = list_launches_for_kernel(events, kernel_name)
82
+ print(f"\nLaunches for '{kernel_name}':")
83
+ print("-" * 60)
84
+ for launch in launches:
85
+ grid_str = str(launch.grid) if launch.grid else "N/A"
86
+ print(
87
+ f" id={launch.launch_id:3d} line {launch.line_index:5d} grid={grid_str}"
88
+ )
89
+ except ValueError as e:
90
+ error_msg = str(e)
91
+ print(f"\nError: {error_msg}")
92
+ # Try to suggest similar kernels
93
+ try:
94
+ similar = find_similar_kernels(events, kernel_name, n=3)
95
+ if similar:
96
+ print("\nDid you mean one of these?")
97
+ all_kernels = list_kernels_fast(
98
+ events
99
+ ) # Use fast path for consistency
100
+ kernel_dict = {k.name: k for k in all_kernels}
101
+ for name in similar:
102
+ count = kernel_dict[name].total_launches
103
+ print(f" - {name} ({count} launches)")
104
+ print("\nUse 'tritonparseoss info <file>' to list all kernels.")
105
+ except Exception:
106
+ pass # Ignore errors in suggestion
107
+ raise
108
+ else:
109
+ # List all kernels
110
+ kernels = list_kernels_fast(events)
111
+ print(f"\nKernels in {input_path}:")
112
+ print("-" * 60)
113
+ for kernel in kernels:
114
+ if kernel.total_launches > 0:
115
+ max_id = kernel.total_launches - 1
116
+ print(
117
+ f" {kernel.name:30s} {kernel.total_launches:3d} launches "
118
+ f"(id: 0-{max_id})"
119
+ )
120
+ else:
121
+ print(f" {kernel.name:30s} {kernel.total_launches:3d} launches")
@@ -0,0 +1,209 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+
3
+ """
4
+ Core query functions for kernel information from NDJSON trace files.
5
+
6
+ This module provides functions to query kernel launch information from parsed
7
+ event lists. It supports both raw log files and parsed ndjson files (with launch_diff events).
8
+ """
9
+
10
+ import difflib
11
+ from collections import defaultdict
12
+ from dataclasses import dataclass
13
+ from typing import Any, Dict, List
14
+
15
+
16
+ @dataclass
17
+ class KernelSummary:
18
+ """Summary information about a kernel."""
19
+
20
+ name: str
21
+ hash: str
22
+ total_launches: int
23
+
24
+
25
+ @dataclass
26
+ class LaunchInfo:
27
+ """Information about a specific kernel launch."""
28
+
29
+ launch_id: int # 0-based
30
+ line_index: int # 0-based (index in events list)
31
+ grid: List[int]
32
+
33
+
34
+ def list_kernels(events: List[Dict[str, Any]]) -> List[KernelSummary]:
35
+ """
36
+ List all kernels with their launch counts.
37
+
38
+ Args:
39
+ events: List of parsed event dictionaries from NDJSON file
40
+
41
+ Returns:
42
+ List of KernelSummary objects, sorted by kernel name
43
+ """
44
+ # Count launches per kernel
45
+ kernel_counts: Dict[str, Dict[str, Any]] = defaultdict(
46
+ lambda: {"hash": "", "count": 0}
47
+ )
48
+
49
+ for event in events:
50
+ if event.get("event_type") != "launch":
51
+ continue
52
+
53
+ comp_meta = event.get("compilation_metadata", {})
54
+ kernel_name = comp_meta.get("name")
55
+ kernel_hash = comp_meta.get("hash", "")
56
+
57
+ if kernel_name:
58
+ kernel_counts[kernel_name]["hash"] = kernel_hash
59
+ kernel_counts[kernel_name]["count"] += 1
60
+
61
+ # Convert to KernelSummary list
62
+ summaries = [
63
+ KernelSummary(name=name, hash=info["hash"], total_launches=info["count"])
64
+ for name, info in kernel_counts.items()
65
+ ]
66
+
67
+ # Sort by kernel name for consistent output
68
+ summaries.sort(key=lambda x: x.name)
69
+
70
+ return summaries
71
+
72
+
73
+ def find_launch_index_by_kernel(
74
+ events: List[Dict[str, Any]], kernel_name: str, launch_id: int
75
+ ) -> int:
76
+ """
77
+ Find the 0-based line index for a kernel's N-th launch.
78
+
79
+ Args:
80
+ events: List of parsed event dictionaries
81
+ kernel_name: Exact kernel name to match (case-sensitive)
82
+ launch_id: 0-based launch index for the kernel
83
+
84
+ Returns:
85
+ 0-based line index (index in events list)
86
+
87
+ Raises:
88
+ ValueError: If kernel not found or launch_id out of range
89
+ """
90
+ count = 0
91
+ for i, event in enumerate(events):
92
+ if event.get("event_type") != "launch":
93
+ continue
94
+
95
+ comp_meta = event.get("compilation_metadata", {})
96
+ name = comp_meta.get("name")
97
+ if name == kernel_name:
98
+ if count == launch_id:
99
+ return i
100
+ count += 1
101
+
102
+ if count == 0:
103
+ raise ValueError(f"Kernel '{kernel_name}' not found")
104
+ else:
105
+ raise ValueError(
106
+ f"Kernel '{kernel_name}' has only {count} launches, "
107
+ f"but --launch-id {launch_id} was requested. Valid range: 0 to {count - 1}"
108
+ )
109
+
110
+
111
+ def list_launches_for_kernel(
112
+ events: List[Dict[str, Any]], kernel_name: str
113
+ ) -> List[LaunchInfo]:
114
+ """
115
+ List all launches for a specific kernel.
116
+
117
+ Args:
118
+ events: List of parsed event dictionaries
119
+ kernel_name: Exact kernel name to match (case-sensitive)
120
+
121
+ Returns:
122
+ List of LaunchInfo objects for the kernel, sorted by launch_id
123
+
124
+ Raises:
125
+ ValueError: If kernel not found
126
+ """
127
+ launches = []
128
+ launch_id = 0
129
+
130
+ for i, event in enumerate(events):
131
+ if event.get("event_type") != "launch":
132
+ continue
133
+
134
+ comp_meta = event.get("compilation_metadata", {})
135
+ name = comp_meta.get("name")
136
+ if name == kernel_name:
137
+ # Extract grid information from launch event
138
+ grid = event.get("grid", [])
139
+ launches.append(LaunchInfo(launch_id=launch_id, line_index=i, grid=grid))
140
+ launch_id += 1
141
+
142
+ if not launches:
143
+ raise ValueError(f"Kernel '{kernel_name}' not found")
144
+
145
+ return launches
146
+
147
+
148
+ def find_similar_kernels(
149
+ events: List[Dict[str, Any]], kernel_name: str, n: int = 3
150
+ ) -> List[str]:
151
+ """
152
+ Find similar kernel names using fuzzy matching.
153
+
154
+ Args:
155
+ events: List of parsed event dictionaries
156
+ kernel_name: Kernel name to find similar matches for
157
+ n: Maximum number of matches to return
158
+
159
+ Returns:
160
+ List of similar kernel names (may be empty if no matches found)
161
+ """
162
+ all_kernels = list_kernels(events)
163
+ all_names = [k.name for k in all_kernels]
164
+ return difflib.get_close_matches(kernel_name, all_names, n=n, cutoff=0.6)
165
+
166
+
167
+ def list_kernels_fast(events: List[Dict[str, Any]]) -> List[KernelSummary]:
168
+ """
169
+ Fast kernel listing using launch_diff events when available.
170
+
171
+ If launch_diff events are present, uses them for fast listing.
172
+ Otherwise, falls back to list_kernels().
173
+
174
+ Args:
175
+ events: List of parsed event dictionaries
176
+
177
+ Returns:
178
+ List of KernelSummary objects, sorted by kernel name
179
+ """
180
+ # Check if launch_diff events are available
181
+ launch_diff_events = [e for e in events if e.get("event_type") == "launch_diff"]
182
+
183
+ if launch_diff_events:
184
+ # Use launch_diff events for fast listing
185
+ # Merge kernels with the same name (sum up launches)
186
+ kernel_dict: Dict[str, KernelSummary] = {}
187
+ for event in launch_diff_events:
188
+ name = event.get("name", "")
189
+ if not name:
190
+ continue
191
+ hash_val = event.get("hash", "")
192
+ launches = event.get("total_launches", 0)
193
+
194
+ if name in kernel_dict:
195
+ # Merge: sum up launches, keep first hash
196
+ kernel_dict[name].total_launches += launches
197
+ else:
198
+ kernel_dict[name] = KernelSummary(
199
+ name=name,
200
+ hash=hash_val,
201
+ total_launches=launches,
202
+ )
203
+
204
+ summaries = list(kernel_dict.values())
205
+ summaries.sort(key=lambda x: x.name)
206
+ return summaries
207
+ else:
208
+ # Fall back to full traversal
209
+ return list_kernels(events)
@@ -0,0 +1,70 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+
3
+ """
4
+ Helper functions for parsing raw log files in the info module.
5
+
6
+ This module provides utilities to parse and compress raw log files,
7
+ reusing functionality from the parse module.
8
+ """
9
+
10
+ from pathlib import Path
11
+
12
+ from tritonparse.common import gzip_single_file
13
+ from tritonparse.trace_processor import parse_single_file
14
+
15
+
16
+ def parse_and_compress_raw_log(
17
+ input_path: str,
18
+ output_dir: str,
19
+ split_inductor_compilations: bool = False,
20
+ verbose: bool = False,
21
+ ) -> Path:
22
+ """
23
+ Parse a raw log file, compress it, and return the path to the compressed parsed file.
24
+
25
+ This function reuses the parse module's functionality:
26
+ - parse_single_file: Parse the file
27
+ - gzip_single_file: Compress the parsed file
28
+
29
+ Args:
30
+ input_path: Path to raw log file
31
+ output_dir: Directory to save parsed file
32
+ split_inductor_compilations: Whether to split by inductor compilations
33
+ verbose: Whether to print verbose information
34
+
35
+ Returns:
36
+ Path to the generated compressed parsed file (.ndjson.gz)
37
+
38
+ Raises:
39
+ RuntimeError: If parsing fails or parsed file not found
40
+ """
41
+ # 1. Parse the file (generates uncompressed .ndjson)
42
+ parse_single_file(
43
+ input_path,
44
+ output_dir=output_dir,
45
+ split_inductor_compilations=split_inductor_compilations,
46
+ )
47
+
48
+ # 2. Calculate expected output filename
49
+ input_path_obj = Path(input_path)
50
+ file_name = input_path_obj.name
51
+
52
+ if input_path.endswith(".bin.ndjson"):
53
+ file_name_without_ext = file_name[:-11] # Remove ".bin.ndjson"
54
+ else:
55
+ file_name_without_ext = input_path_obj.stem # Remove all extensions
56
+ # If there's still a .ndjson extension, remove it
57
+ if file_name_without_ext.endswith(".ndjson"):
58
+ file_name_without_ext = file_name_without_ext[:-7]
59
+
60
+ uncompressed_file = Path(output_dir) / f"{file_name_without_ext}_mapped.ndjson"
61
+
62
+ if not uncompressed_file.exists():
63
+ raise RuntimeError(
64
+ f"Failed to generate parsed file. Expected: {uncompressed_file}"
65
+ )
66
+
67
+ # 3. Compress the file (reusing parse module's function)
68
+ compressed_file = gzip_single_file(str(uncompressed_file), verbose=verbose)
69
+
70
+ return Path(compressed_file) # Returns .ndjson.gz path