tritonparse 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tritonparse might be problematic. Click here for more details.
- tritonparse/__init__.py +0 -0
- tritonparse/common.py +409 -0
- tritonparse/event_diff.py +120 -0
- tritonparse/extract_source_mappings.py +49 -0
- tritonparse/ir_parser.py +220 -0
- tritonparse/mapper.py +100 -0
- tritonparse/reproducer/__init__.py +21 -0
- tritonparse/reproducer/__main__.py +81 -0
- tritonparse/reproducer/cli.py +37 -0
- tritonparse/reproducer/config.py +15 -0
- tritonparse/reproducer/factory.py +16 -0
- tritonparse/reproducer/ingestion/__init__.py +6 -0
- tritonparse/reproducer/ingestion/ndjson.py +165 -0
- tritonparse/reproducer/orchestrator.py +65 -0
- tritonparse/reproducer/param_generator.py +142 -0
- tritonparse/reproducer/prompts/__init__.py +1 -0
- tritonparse/reproducer/prompts/loader.py +18 -0
- tritonparse/reproducer/providers/__init__.py +1 -0
- tritonparse/reproducer/providers/base.py +14 -0
- tritonparse/reproducer/providers/gemini.py +47 -0
- tritonparse/reproducer/runtime/__init__.py +1 -0
- tritonparse/reproducer/runtime/executor.py +13 -0
- tritonparse/reproducer/utils/io.py +6 -0
- tritonparse/shared_vars.py +9 -0
- tritonparse/source_type.py +56 -0
- tritonparse/sourcemap_utils.py +72 -0
- tritonparse/structured_logging.py +1046 -0
- tritonparse/tools/__init__.py +0 -0
- tritonparse/tools/decompress_bin_ndjson.py +118 -0
- tritonparse/tools/format_fix.py +149 -0
- tritonparse/tools/load_tensor.py +58 -0
- tritonparse/tools/prettify_ndjson.py +315 -0
- tritonparse/tp_logger.py +9 -0
- tritonparse/trace_processor.py +331 -0
- tritonparse/utils.py +156 -0
- tritonparse-0.1.1.dist-info/METADATA +10 -0
- tritonparse-0.1.1.dist-info/RECORD +40 -0
- tritonparse-0.1.1.dist-info/WHEEL +5 -0
- tritonparse-0.1.1.dist-info/licenses/LICENSE +29 -0
- tritonparse-0.1.1.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Script to decompress .bin.ndjson files back to regular .ndjson format.
|
|
4
|
+
|
|
5
|
+
The .bin.ndjson format stores each JSON record as a separate gzip member,
|
|
6
|
+
concatenated in sequence within a single binary file. This script uses
|
|
7
|
+
gzip.open() which automatically handles member concatenation to read
|
|
8
|
+
the compressed file and write out the original NDJSON format.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python decompress_bin_ndjson.py trace.bin.ndjson
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import gzip
|
|
16
|
+
import sys
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def decompress_bin_ndjson(input_file: str, output_file: str = None) -> None:
|
|
21
|
+
"""
|
|
22
|
+
Decompress a .bin.ndjson file to regular .ndjson format.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
input_file: Path to the .bin.ndjson file
|
|
26
|
+
output_file: Path for the output .ndjson file (optional)
|
|
27
|
+
"""
|
|
28
|
+
input_path = Path(input_file)
|
|
29
|
+
|
|
30
|
+
# Validate input file
|
|
31
|
+
if not input_path.exists():
|
|
32
|
+
print(f"Error: Input file '{input_file}' does not exist", file=sys.stderr)
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
if not input_path.suffix.endswith(".bin.ndjson"):
|
|
36
|
+
print(f"Warning: Input file '{input_file}' doesn't have .bin.ndjson extension")
|
|
37
|
+
|
|
38
|
+
# Determine output file path
|
|
39
|
+
if output_file is None:
|
|
40
|
+
if input_path.name.endswith(".bin.ndjson"):
|
|
41
|
+
# Replace .bin.ndjson with .ndjson
|
|
42
|
+
output_file = str(input_path.with_suffix("").with_suffix(".ndjson"))
|
|
43
|
+
else:
|
|
44
|
+
# Add .decompressed.ndjson suffix
|
|
45
|
+
output_file = str(input_path.with_suffix(".decompressed.ndjson"))
|
|
46
|
+
|
|
47
|
+
output_path = Path(output_file)
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
line_count = 0
|
|
51
|
+
# Because we use NDJSON format, each line is a complete JSON record.
|
|
52
|
+
# It is guruanteed here https://github.com/meta-pytorch/tritonparse/blob/
|
|
53
|
+
# c8dcc2a94ac10ede4342dba7456f6ebd8409b95d/tritonparse/structured_logging.py#L320
|
|
54
|
+
with gzip.open(input_path, "rt", encoding="utf-8") as compressed_file:
|
|
55
|
+
with open(output_path, "w", encoding="utf-8") as output:
|
|
56
|
+
for line in compressed_file:
|
|
57
|
+
# gzip.open automatically handles member concatenation
|
|
58
|
+
# Each line is already a complete JSON record with newline
|
|
59
|
+
output.write(line)
|
|
60
|
+
line_count += 1
|
|
61
|
+
|
|
62
|
+
# Get file sizes for comparison
|
|
63
|
+
input_size = input_path.stat().st_size
|
|
64
|
+
output_size = output_path.stat().st_size
|
|
65
|
+
compression_ratio = (
|
|
66
|
+
(1 - input_size / output_size) * 100 if output_size > 0 else 0
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
print(f"Successfully decompressed '{input_file}' to '{output_file}'")
|
|
70
|
+
print(f" Input size: {input_size:,} bytes")
|
|
71
|
+
print(f" Output size: {output_size:,} bytes")
|
|
72
|
+
print(f" Compression ratio: {compression_ratio:.1f}%")
|
|
73
|
+
print(f" Records processed: {line_count:,}")
|
|
74
|
+
|
|
75
|
+
except gzip.BadGzipFile as e:
|
|
76
|
+
print(f"Error: Invalid gzip format in '{input_file}': {e}", file=sys.stderr)
|
|
77
|
+
except UnicodeDecodeError as e:
|
|
78
|
+
print(f"Error: Unicode decode error in '{input_file}': {e}", file=sys.stderr)
|
|
79
|
+
except Exception as e:
|
|
80
|
+
print(f"Error: Failed to decompress '{input_file}': {e}", file=sys.stderr)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def main():
|
|
84
|
+
parser = argparse.ArgumentParser(
|
|
85
|
+
description="Decompress .bin.ndjson files to regular .ndjson format",
|
|
86
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
87
|
+
epilog="""
|
|
88
|
+
Examples:
|
|
89
|
+
%(prog)s trace.bin.ndjson
|
|
90
|
+
%(prog)s trace.bin.ndjson -o output.ndjson
|
|
91
|
+
%(prog)s /logs/dedicated_log_triton_trace_user_.bin.ndjson
|
|
92
|
+
""",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
parser.add_argument("input_file", help="Input .bin.ndjson file to decompress")
|
|
96
|
+
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"-o",
|
|
99
|
+
"--output",
|
|
100
|
+
help="Output .ndjson file path (default: replace .bin.ndjson with .ndjson)",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
parser.add_argument(
|
|
104
|
+
"-v", "--verbose", action="store_true", help="Enable verbose output"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
args = parser.parse_args()
|
|
108
|
+
|
|
109
|
+
if args.verbose:
|
|
110
|
+
print(f"Decompressing: {args.input_file}")
|
|
111
|
+
if args.output:
|
|
112
|
+
print(f"Output file: {args.output}")
|
|
113
|
+
|
|
114
|
+
decompress_bin_ndjson(args.input_file, args.output)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
if __name__ == "__main__":
|
|
118
|
+
main()
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Format fix script for tritonparse project.
|
|
4
|
+
|
|
5
|
+
This script runs all linter tools to format and fix code issues:
|
|
6
|
+
- usort: Import sorting
|
|
7
|
+
- ruff: Linting only
|
|
8
|
+
- black: Code formatting
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python -m tritonparse.tools.format_fix [options]
|
|
12
|
+
|
|
13
|
+
Options:
|
|
14
|
+
--check-only Only check for issues, don't fix them
|
|
15
|
+
--verbose Verbose output
|
|
16
|
+
--help Show this help message
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import subprocess
|
|
21
|
+
import sys
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def run_command(cmd: list[str], verbose: bool = False) -> bool:
|
|
25
|
+
"""Run a command and return success status."""
|
|
26
|
+
if verbose:
|
|
27
|
+
print(f"Running: {' '.join(cmd)}")
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
|
|
31
|
+
|
|
32
|
+
if result.returncode != 0:
|
|
33
|
+
if verbose:
|
|
34
|
+
print(f"Command failed with return code {result.returncode}")
|
|
35
|
+
if result.stdout:
|
|
36
|
+
print("STDOUT:", result.stdout)
|
|
37
|
+
if result.stderr:
|
|
38
|
+
print("STDERR:", result.stderr)
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
if verbose and result.stdout:
|
|
42
|
+
print(result.stdout)
|
|
43
|
+
|
|
44
|
+
return True
|
|
45
|
+
except Exception as e:
|
|
46
|
+
if verbose:
|
|
47
|
+
print(f"Error running command: {e}")
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def run_usort(check_only: bool = False, verbose: bool = False) -> bool:
|
|
52
|
+
"""Run usort for import sorting."""
|
|
53
|
+
cmd = ["usort"]
|
|
54
|
+
|
|
55
|
+
if check_only:
|
|
56
|
+
cmd.extend(["check", "."])
|
|
57
|
+
else:
|
|
58
|
+
cmd.extend(["format", "."])
|
|
59
|
+
|
|
60
|
+
return run_command(cmd, verbose)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def run_ruff_check(check_only: bool = False, verbose: bool = False) -> bool:
|
|
64
|
+
"""Run ruff for linting only."""
|
|
65
|
+
cmd = ["ruff", "check", "."]
|
|
66
|
+
|
|
67
|
+
if check_only:
|
|
68
|
+
cmd.append("--diff")
|
|
69
|
+
else:
|
|
70
|
+
cmd.append("--fix")
|
|
71
|
+
|
|
72
|
+
return run_command(cmd, verbose)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def run_black(check_only: bool = False, verbose: bool = False) -> bool:
|
|
76
|
+
"""Run black for code formatting."""
|
|
77
|
+
cmd = ["black"]
|
|
78
|
+
|
|
79
|
+
if check_only:
|
|
80
|
+
cmd.extend(["--check", "--diff", "."])
|
|
81
|
+
else:
|
|
82
|
+
cmd.append(".")
|
|
83
|
+
|
|
84
|
+
return run_command(cmd, verbose)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def main():
|
|
88
|
+
"""Main function."""
|
|
89
|
+
parser = argparse.ArgumentParser(
|
|
90
|
+
description="Format fix script for tritonparse project",
|
|
91
|
+
epilog="""
|
|
92
|
+
Examples:
|
|
93
|
+
# Fix all formatting issues
|
|
94
|
+
python -m tritonparse.tools.format_fix
|
|
95
|
+
|
|
96
|
+
# Check for issues without fixing
|
|
97
|
+
python -m tritonparse.tools.format_fix --check-only
|
|
98
|
+
|
|
99
|
+
# Verbose output
|
|
100
|
+
python -m tritonparse.tools.format_fix --verbose
|
|
101
|
+
""",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
"--check-only",
|
|
106
|
+
action="store_true",
|
|
107
|
+
help="Only check for issues, don't fix them",
|
|
108
|
+
)
|
|
109
|
+
parser.add_argument("--verbose", action="store_true", help="Verbose output")
|
|
110
|
+
|
|
111
|
+
args = parser.parse_args()
|
|
112
|
+
|
|
113
|
+
# Run formatters on the entire project
|
|
114
|
+
success = True
|
|
115
|
+
|
|
116
|
+
# 1. Run usort for import sorting
|
|
117
|
+
print("Running usort for import sorting...")
|
|
118
|
+
if not run_usort(args.check_only, args.verbose):
|
|
119
|
+
print("❌ usort failed")
|
|
120
|
+
success = False
|
|
121
|
+
else:
|
|
122
|
+
print("✅ usort completed")
|
|
123
|
+
|
|
124
|
+
# 2. Run ruff for linting only
|
|
125
|
+
print("Running ruff for linting...")
|
|
126
|
+
if not run_ruff_check(args.check_only, args.verbose):
|
|
127
|
+
print("❌ ruff linting failed")
|
|
128
|
+
success = False
|
|
129
|
+
else:
|
|
130
|
+
print("✅ ruff linting completed")
|
|
131
|
+
|
|
132
|
+
# 3. Run black for code formatting
|
|
133
|
+
print("Running black for code formatting...")
|
|
134
|
+
if not run_black(args.check_only, args.verbose):
|
|
135
|
+
print("❌ black failed")
|
|
136
|
+
success = False
|
|
137
|
+
else:
|
|
138
|
+
print("✅ black completed")
|
|
139
|
+
|
|
140
|
+
if success:
|
|
141
|
+
print("\n🎉 All formatting tools completed successfully!")
|
|
142
|
+
return 0
|
|
143
|
+
else:
|
|
144
|
+
print("\n❌ Some formatting tools failed. Please check the output above.")
|
|
145
|
+
return 1
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
if __name__ == "__main__":
|
|
149
|
+
sys.exit(main())
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Simple tensor loading utility for tritonparse saved tensors.
|
|
4
|
+
Usage:
|
|
5
|
+
import tritonparse.tools.load_tensor as load_tensor
|
|
6
|
+
tensor = load_tensor.load_tensor(tensor_file_path, device)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import torch
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
|
|
16
|
+
"""
|
|
17
|
+
Load a tensor from its file path and verify its integrity using the hash in the filename.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
tensor_file_path (str): Direct path to the tensor .bin file. The filename should be
|
|
21
|
+
the hash of the file contents followed by .bin extension.
|
|
22
|
+
device (str, optional): Device to load the tensor to (e.g., 'cuda:0', 'cpu').
|
|
23
|
+
If None, keeps the tensor on its original device.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
torch.Tensor: The loaded tensor (moved to the specified device if provided)
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
FileNotFoundError: If the tensor file doesn't exist
|
|
30
|
+
RuntimeError: If the tensor cannot be loaded
|
|
31
|
+
ValueError: If the computed hash doesn't match the filename hash
|
|
32
|
+
"""
|
|
33
|
+
blob_path = Path(tensor_file_path)
|
|
34
|
+
|
|
35
|
+
if not blob_path.exists():
|
|
36
|
+
raise FileNotFoundError(f"Tensor blob not found: {blob_path}")
|
|
37
|
+
|
|
38
|
+
# Extract expected hash from filename (remove .bin extension)
|
|
39
|
+
expected_hash = blob_path.stem
|
|
40
|
+
|
|
41
|
+
# Compute actual hash of file contents
|
|
42
|
+
with open(blob_path, "rb") as f:
|
|
43
|
+
file_contents = f.read()
|
|
44
|
+
computed_hash = hashlib.blake2b(file_contents).hexdigest()
|
|
45
|
+
|
|
46
|
+
# Verify hash matches filename
|
|
47
|
+
if computed_hash != expected_hash:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"Hash verification failed: expected '{expected_hash}' but computed '{computed_hash}'"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
# Load the tensor using torch.load (tensors are saved with torch.save)
|
|
54
|
+
# If device is None, keep tensor on its original device, otherwise move to specified device
|
|
55
|
+
tensor = torch.load(blob_path, map_location=device)
|
|
56
|
+
return tensor
|
|
57
|
+
except Exception as e:
|
|
58
|
+
raise RuntimeError(f"Failed to load tensor from {blob_path}: {str(e)}")
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Convert an NDJSON file to a prettified JSON file.
|
|
4
|
+
|
|
5
|
+
This script takes an NDJSON (newline-delimited JSON) file and converts it to a
|
|
6
|
+
standard human-readable JSON file where each line becomes an element in a JSON array, with
|
|
7
|
+
pretty formatting applied.
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
Input NDJSON file (data.ndjson):
|
|
11
|
+
{"name": "Alice", "age": 30}
|
|
12
|
+
{"name": "Bob", "age": 25}
|
|
13
|
+
{"name": "Charlie", "age": 35}
|
|
14
|
+
|
|
15
|
+
Output JSON file (data_prettified.json):
|
|
16
|
+
[
|
|
17
|
+
{
|
|
18
|
+
"age": 30,
|
|
19
|
+
"name": "Alice"
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"age": 25,
|
|
23
|
+
"name": "Bob"
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"age": 35,
|
|
27
|
+
"name": "Charlie"
|
|
28
|
+
}
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
Usage:
|
|
32
|
+
python prettify_ndjson.py data.ndjson
|
|
33
|
+
python prettify_ndjson.py --lines 1,3 data.ndjson # Only process lines 1 and 3
|
|
34
|
+
python prettify_ndjson.py --save-irs logs.ndjson # Keep all fields for compilation events
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
import argparse
|
|
40
|
+
import json
|
|
41
|
+
import sys
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
from typing import Any, List
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def parse_line_ranges(lines_arg: str) -> set[int]:
|
|
47
|
+
"""
|
|
48
|
+
Parse line ranges from string like "1,2,3,5-10" into a set of line numbers.
|
|
49
|
+
|
|
50
|
+
Line numbers use 1-based indexing (first line is line 1, not 0).
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
lines_arg: String containing comma-separated line numbers and ranges
|
|
54
|
+
Examples: "1", "1,2,3", "5-10", "1,3,5-10,15"
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Set of line numbers (1-based indexing, where 1 = first line)
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ValueError: If the format is invalid or contains non-positive numbers
|
|
61
|
+
"""
|
|
62
|
+
line_numbers = set()
|
|
63
|
+
|
|
64
|
+
if not lines_arg.strip():
|
|
65
|
+
return line_numbers
|
|
66
|
+
|
|
67
|
+
parts = lines_arg.split(",")
|
|
68
|
+
for part in parts:
|
|
69
|
+
part = part.strip()
|
|
70
|
+
if not part:
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
if "-" in part:
|
|
74
|
+
# Handle range like "5-10"
|
|
75
|
+
try:
|
|
76
|
+
start, end = part.split("-", 1)
|
|
77
|
+
start_num = int(start.strip())
|
|
78
|
+
end_num = int(end.strip())
|
|
79
|
+
if start_num <= 0 or end_num <= 0:
|
|
80
|
+
raise ValueError("Line numbers must be positive")
|
|
81
|
+
if start_num > end_num:
|
|
82
|
+
raise ValueError(f"Invalid range: {part} (start > end)")
|
|
83
|
+
line_numbers.update(range(start_num, end_num + 1))
|
|
84
|
+
except ValueError as e:
|
|
85
|
+
if "invalid literal" in str(e):
|
|
86
|
+
raise ValueError(f"Invalid range format: {part}")
|
|
87
|
+
raise
|
|
88
|
+
else:
|
|
89
|
+
# Handle single number like "1"
|
|
90
|
+
try:
|
|
91
|
+
line_num = int(part)
|
|
92
|
+
if line_num <= 0:
|
|
93
|
+
raise ValueError("Line numbers must be positive")
|
|
94
|
+
line_numbers.add(line_num)
|
|
95
|
+
except ValueError:
|
|
96
|
+
raise ValueError(f"Invalid line number: {part}")
|
|
97
|
+
|
|
98
|
+
return line_numbers
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def load_ndjson(
|
|
102
|
+
file_path: Path, save_irs: bool = False, line_filter: set[int] = None
|
|
103
|
+
) -> List[Any]:
|
|
104
|
+
"""
|
|
105
|
+
Load NDJSON file and return list of JSON objects.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
file_path: Path to the NDJSON file
|
|
109
|
+
save_irs: Whether to save file_content and python_source for compilation events
|
|
110
|
+
line_filter: Set of line numbers to include (1-based indexing), None means include all
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List of parsed JSON objects
|
|
114
|
+
|
|
115
|
+
Raises:
|
|
116
|
+
FileNotFoundError: If the input file doesn't exist
|
|
117
|
+
json.JSONDecodeError: If a line contains invalid JSON
|
|
118
|
+
"""
|
|
119
|
+
json_objects = []
|
|
120
|
+
filtered_compilation_events = 0
|
|
121
|
+
total_lines_processed = 0
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
125
|
+
# enumerate(f, 1) starts line numbering from 1 (1-based indexing)
|
|
126
|
+
for line_num, line in enumerate(f, 1):
|
|
127
|
+
line = line.strip()
|
|
128
|
+
if not line: # Skip empty lines
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
# Skip line if line filtering is enabled and this line is not in the filter
|
|
132
|
+
# line_num is 1-based (first line = 1, second line = 2, etc.)
|
|
133
|
+
if line_filter is not None and line_num not in line_filter:
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
total_lines_processed += 1
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
json_obj = json.loads(line)
|
|
140
|
+
|
|
141
|
+
# Filter out file_content and python_source for compilation events if save_irs is False
|
|
142
|
+
if not save_irs and isinstance(json_obj, dict):
|
|
143
|
+
event_type = json_obj.get("event_type")
|
|
144
|
+
if event_type == "compilation":
|
|
145
|
+
# Remove file_content and python_source from payload if they exist
|
|
146
|
+
payload = json_obj.get("payload")
|
|
147
|
+
if isinstance(payload, dict):
|
|
148
|
+
fields_to_remove = []
|
|
149
|
+
if "file_content" in payload:
|
|
150
|
+
fields_to_remove.append("file_content")
|
|
151
|
+
if "python_source" in payload:
|
|
152
|
+
fields_to_remove.append("python_source")
|
|
153
|
+
|
|
154
|
+
if fields_to_remove:
|
|
155
|
+
payload = (
|
|
156
|
+
payload.copy()
|
|
157
|
+
) # Create a copy to avoid modifying original
|
|
158
|
+
for field in fields_to_remove:
|
|
159
|
+
del payload[field]
|
|
160
|
+
json_obj = (
|
|
161
|
+
json_obj.copy()
|
|
162
|
+
) # Create a copy of the main object
|
|
163
|
+
json_obj["payload"] = payload
|
|
164
|
+
filtered_compilation_events += 1
|
|
165
|
+
|
|
166
|
+
json_objects.append(json_obj)
|
|
167
|
+
except json.JSONDecodeError as e:
|
|
168
|
+
print(
|
|
169
|
+
f"Error parsing JSON on line {line_num}: {e}", file=sys.stderr
|
|
170
|
+
)
|
|
171
|
+
print(f"Problematic line: {line[:100]}...", file=sys.stderr)
|
|
172
|
+
raise
|
|
173
|
+
|
|
174
|
+
except FileNotFoundError:
|
|
175
|
+
print(f"Error: File '{file_path}' not found.", file=sys.stderr)
|
|
176
|
+
raise
|
|
177
|
+
except Exception as e:
|
|
178
|
+
print(f"Error reading file '{file_path}': {e}", file=sys.stderr)
|
|
179
|
+
raise
|
|
180
|
+
|
|
181
|
+
# Print informational messages
|
|
182
|
+
if line_filter is not None:
|
|
183
|
+
if line_filter:
|
|
184
|
+
print(
|
|
185
|
+
f"Line filtering: processed {total_lines_processed} out of {len(line_filter)} specified lines"
|
|
186
|
+
)
|
|
187
|
+
else:
|
|
188
|
+
print("Line filtering: no valid lines specified")
|
|
189
|
+
|
|
190
|
+
# Print warning if compilation events were filtered
|
|
191
|
+
if not save_irs and filtered_compilation_events > 0:
|
|
192
|
+
print(
|
|
193
|
+
f"WARNING: Removed 'file_content' and 'python_source' fields from {filtered_compilation_events} compilation events to reduce file size.",
|
|
194
|
+
file=sys.stderr,
|
|
195
|
+
)
|
|
196
|
+
print(
|
|
197
|
+
" Use --save-irs flag to preserve these fields if needed.",
|
|
198
|
+
file=sys.stderr,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return json_objects
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def save_prettified_json(json_objects: List[Any], output_path: Path) -> None:
|
|
205
|
+
"""
|
|
206
|
+
Save list of JSON objects to a prettified JSON file.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
json_objects: List of JSON objects to save
|
|
210
|
+
output_path: Path where to save the prettified JSON file
|
|
211
|
+
"""
|
|
212
|
+
try:
|
|
213
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
214
|
+
json.dump(json_objects, f, indent=2, ensure_ascii=False, sort_keys=True)
|
|
215
|
+
print(f"Successfully converted to prettified JSON: {output_path}")
|
|
216
|
+
except Exception as e:
|
|
217
|
+
print(f"Error writing to file '{output_path}': {e}", file=sys.stderr)
|
|
218
|
+
raise
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def main():
|
|
222
|
+
"""Main function to handle command line arguments and orchestrate the conversion."""
|
|
223
|
+
parser = argparse.ArgumentParser(
|
|
224
|
+
description="Convert NDJSON file to prettified JSON file",
|
|
225
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
226
|
+
epilog="""
|
|
227
|
+
Examples:
|
|
228
|
+
python prettify_ndjson.py data.ndjson
|
|
229
|
+
python prettify_ndjson.py /path/to/logs.ndjson
|
|
230
|
+
""",
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
parser.add_argument(
|
|
234
|
+
"ndjson_file", type=str, help="Path to the NDJSON file to convert"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
parser.add_argument(
|
|
238
|
+
"--save-irs",
|
|
239
|
+
action="store_true",
|
|
240
|
+
default=False,
|
|
241
|
+
help="Save file_content and python_source for compilation events (default: False, removes these fields to reduce size)",
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
parser.add_argument(
|
|
245
|
+
"--lines",
|
|
246
|
+
type=str,
|
|
247
|
+
help="Specify line numbers to include using 1-based indexing (e.g., '1,2,3,5-10'). "
|
|
248
|
+
"Line 1 is the first line of the file. Only these lines from the original NDJSON will be processed. "
|
|
249
|
+
"Supports individual lines (1,2,3) and ranges (5-10).",
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
parser.add_argument(
|
|
253
|
+
"-o",
|
|
254
|
+
"--output",
|
|
255
|
+
type=str,
|
|
256
|
+
help="Specify output file path (default: {input_stem}_prettified.json in the same directory as input)",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
args = parser.parse_args()
|
|
260
|
+
|
|
261
|
+
# Convert to Path object and validate
|
|
262
|
+
input_path = Path(args.ndjson_file)
|
|
263
|
+
|
|
264
|
+
if not input_path.exists():
|
|
265
|
+
print(f"Error: File '{input_path}' does not exist.", file=sys.stderr)
|
|
266
|
+
sys.exit(1)
|
|
267
|
+
|
|
268
|
+
if not input_path.is_file():
|
|
269
|
+
print(f"Error: '{input_path}' is not a file.", file=sys.stderr)
|
|
270
|
+
sys.exit(1)
|
|
271
|
+
|
|
272
|
+
# Generate output filename
|
|
273
|
+
if args.output:
|
|
274
|
+
output_path = Path(args.output)
|
|
275
|
+
else:
|
|
276
|
+
# Default: original_prettified.json in same directory as input
|
|
277
|
+
output_path = input_path.parent / f"{input_path.stem}_prettified.json"
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
# Parse line filter if provided
|
|
281
|
+
line_filter = None
|
|
282
|
+
if args.lines:
|
|
283
|
+
try:
|
|
284
|
+
line_filter = parse_line_ranges(args.lines)
|
|
285
|
+
print(
|
|
286
|
+
f"Line filtering enabled: will process {len(line_filter)} specified lines"
|
|
287
|
+
)
|
|
288
|
+
except ValueError as e:
|
|
289
|
+
print(f"Error parsing --lines argument: {e}", file=sys.stderr)
|
|
290
|
+
sys.exit(1)
|
|
291
|
+
|
|
292
|
+
# Load NDJSON file
|
|
293
|
+
print(f"Loading NDJSON file: {input_path}")
|
|
294
|
+
if not args.save_irs:
|
|
295
|
+
print(
|
|
296
|
+
"Filtering out file_content and python_source from compilation events to reduce size"
|
|
297
|
+
)
|
|
298
|
+
json_objects = load_ndjson(
|
|
299
|
+
input_path, save_irs=args.save_irs, line_filter=line_filter
|
|
300
|
+
)
|
|
301
|
+
print(f"Loaded {len(json_objects)} JSON objects")
|
|
302
|
+
|
|
303
|
+
# Save as prettified JSON
|
|
304
|
+
print(f"Saving prettified JSON to: {output_path}")
|
|
305
|
+
save_prettified_json(json_objects, output_path)
|
|
306
|
+
|
|
307
|
+
print("Conversion completed successfully!")
|
|
308
|
+
|
|
309
|
+
except Exception as e:
|
|
310
|
+
print(f"Conversion failed: {e}", file=sys.stderr)
|
|
311
|
+
sys.exit(1)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
if __name__ == "__main__":
|
|
315
|
+
main()
|
tritonparse/tp_logger.py
ADDED