tritonparse 0.2.4.dev20251013071533__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tritonparse might be problematic. Click here for more details.

Files changed (121) hide show
  1. tritonparse-0.3.0/CHANGELOG.md +308 -0
  2. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/PKG-INFO +1 -1
  3. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/cli.py +1 -0
  4. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/reproducer/cli.py +15 -0
  5. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/reproducer/orchestrator.py +17 -1
  6. tritonparse-0.3.0/tritonparse/reproducer/placeholder_replacer.py +227 -0
  7. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/reproducer/templates/example.py +3 -0
  8. tritonparse-0.3.0/tritonparse/reproducer/types.py +18 -0
  9. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse.egg-info/PKG-INFO +1 -1
  10. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse.egg-info/SOURCES.txt +1 -5
  11. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/package-lock.json +1451 -1608
  12. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/package.json +14 -10
  13. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/Callstack.tsx +1 -1
  14. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/CodeComparisonView.tsx +1 -1
  15. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/CompilationInfo.tsx +1 -1
  16. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/SingleCodeViewer.tsx +1 -1
  17. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/TritonIRs.tsx +1 -1
  18. tritonparse-0.3.0/website/src/index.css +28 -0
  19. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/pages/KernelOverview.tsx +2 -2
  20. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/utils/dataLoader.ts +1 -1
  21. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/vite.config.ts +2 -0
  22. tritonparse-0.2.4.dev20251013071533/CHANGELOG.md +0 -129
  23. tritonparse-0.2.4.dev20251013071533/docs/README.md +0 -13
  24. tritonparse-0.2.4.dev20251013071533/docs/screenshots/code-comparison.png +0 -0
  25. tritonparse-0.2.4.dev20251013071533/docs/screenshots/kernel-overview.png +0 -0
  26. tritonparse-0.2.4.dev20251013071533/tritonparse/reproducer/placeholder_replacer.py +0 -115
  27. tritonparse-0.2.4.dev20251013071533/website/postcss.config.js +0 -6
  28. tritonparse-0.2.4.dev20251013071533/website/src/index.css +0 -74
  29. tritonparse-0.2.4.dev20251013071533/website/tailwind.config.js +0 -8
  30. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.ci/README.md +0 -0
  31. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.ci/install-project.sh +0 -0
  32. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.ci/install-triton.sh +0 -0
  33. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.ci/run-tests.sh +0 -0
  34. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.ci/setup.sh +0 -0
  35. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.github/PAGES_SETUP.md +0 -0
  36. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.github/workflows/deploy-pages-standalone.yml +0 -0
  37. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.github/workflows/deploy-pages.yml +0 -0
  38. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.github/workflows/nightly-pypi.yml +0 -0
  39. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.github/workflows/test.yml +0 -0
  40. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/.gitignore +0 -0
  41. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/CODE_OF_CONDUCT.md +0 -0
  42. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/CONTRIBUTING.md +0 -0
  43. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/LICENSE +0 -0
  44. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/Makefile +0 -0
  45. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/README.md +0 -0
  46. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/__init__.py +0 -0
  47. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/pyproject.toml +0 -0
  48. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/run.py +0 -0
  49. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/setup.cfg +0 -0
  50. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/README.md +0 -0
  51. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/__init__.py +0 -0
  52. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/example_output/logs/dedicated_log_triton_trace_findhao_.ndjson +0 -0
  53. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/example_output/parsed_output/dedicated_log_triton_trace_findhao__mapped.ndjson.gz +0 -0
  54. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/example_output/parsed_output/f0_fc0_a0_cai-.ndjson.gz +0 -0
  55. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/example_output/parsed_output/log_file_list.json +0 -0
  56. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/example_output/parsed_output_complex/dedicated_log_triton_trace_findhao__mapped.ndjson.gz +0 -0
  57. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/example_output/parsed_output_complex/log_file_list.json +0 -0
  58. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/test_add.py +0 -0
  59. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tests/test_tritonparse.py +0 -0
  60. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/__init__.py +0 -0
  61. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/__main__.py +0 -0
  62. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/common.py +0 -0
  63. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/context_manager.py +0 -0
  64. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/event_diff.py +0 -0
  65. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/extract_source_mappings.py +0 -0
  66. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/ir_parser.py +0 -0
  67. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/mapper.py +0 -0
  68. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/reproducer/__init__.py +0 -0
  69. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/reproducer/ingestion/ndjson.py +0 -0
  70. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/reproducer/templates/__init__.py +0 -0
  71. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/reproducer/templates/loader.py +0 -0
  72. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/reproducer/utils.py +0 -0
  73. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/shared_vars.py +0 -0
  74. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/source_type.py +0 -0
  75. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/sourcemap_utils.py +0 -0
  76. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/structured_logging.py +0 -0
  77. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/tools/__init__.py +0 -0
  78. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/tools/decompress_bin_ndjson.py +0 -0
  79. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/tools/disasm.py +0 -0
  80. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/tools/format_fix.py +0 -0
  81. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/tools/load_tensor.py +0 -0
  82. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/tools/prettify_ndjson.py +0 -0
  83. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/tools/readme.md +0 -0
  84. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/tp_logger.py +0 -0
  85. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/trace_processor.py +0 -0
  86. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse/utils.py +0 -0
  87. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse.egg-info/dependency_links.txt +0 -0
  88. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse.egg-info/entry_points.txt +0 -0
  89. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse.egg-info/requires.txt +0 -0
  90. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/tritonparse.egg-info/top_level.txt +0 -0
  91. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/eslint.config.js +0 -0
  92. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/index.html +0 -0
  93. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/public/dedicated_log_triton_trace_findhao__mapped.ndjson.gz +0 -0
  94. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/public/f0_fc0_a0_cai-.ndjson +0 -0
  95. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/public/favicon.ico +0 -0
  96. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/public/logo.svg +0 -0
  97. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/scripts/inline-html.js +0 -0
  98. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/App.css +0 -0
  99. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/App.tsx +0 -0
  100. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/assets/react.svg +0 -0
  101. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/ArgumentViewer.tsx +0 -0
  102. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/CodeViewer.tsx +0 -0
  103. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/CopyCodeButton.tsx +0 -0
  104. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/DataSourceSelector.tsx +0 -0
  105. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/DiffComparisonView.tsx +0 -0
  106. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/DiffViewer.tsx +0 -0
  107. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/ExternalLink.tsx +0 -0
  108. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/StackDiffViewer.tsx +0 -0
  109. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/ToggleSwitch.tsx +0 -0
  110. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/components/WelcomeScreen.tsx +0 -0
  111. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/context/FileDiffSession.tsx +0 -0
  112. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/main.tsx +0 -0
  113. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/pages/CodeView.tsx +0 -0
  114. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/pages/FileDiffView.tsx +0 -0
  115. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/utils/fbDetection.ts +0 -0
  116. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/utils/safeImport.ts +0 -0
  117. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/utils/tensor.ts +0 -0
  118. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/src/vite-env.d.ts +0 -0
  119. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/tsconfig.app.json +0 -0
  120. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/tsconfig.json +0 -0
  121. {tritonparse-0.2.4.dev20251013071533 → tritonparse-0.3.0}/website/tsconfig.node.json +0 -0
@@ -0,0 +1,308 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.3.0] - 2025-10-14
9
+
10
+ ### TritonParse Release Notes (last 44 commits)
11
+
12
+ - **Date range**: 2025-09-19 — 2025-10-14
13
+ - **Scope**: Major feature release - Reproducer system, tensor storage, SASS support, enhanced context manager, CLI improvements.
14
+
15
+ ### Highlights
16
+
17
+ - **🔄 Reproducer System (Complete)**: Full-featured standalone kernel script generation with template support, tensor reconstruction, and multiple import modes. Extract any traced kernel into a self-contained Python script for debugging, testing, and sharing.
18
+ - **💾 TensorBlobManager**: Production-ready content-addressed tensor storage with automatic compression, deduplication, quota management, and efficient disk usage. Enables high-fidelity kernel reproduction with actual tensor data.
19
+ - **🔧 SASS Disassembly Support**: Optional NVIDIA SASS disassembly during compilation tracing for low-level debugging and performance analysis. Toggle via `enable_sass_dump` parameter or `TRITONPARSE_DUMP_SASS` environment variable.
20
+ - **🎯 Enhanced Context Manager**: Configurable `TritonParseManager` context manager with support for trace launch control, inductor compilation splitting, and flexible parsing parameters.
21
+ - **⚡ CLI Modernization**: Refactored to subcommand structure (`tritonparse parse`, `tritonparse reproduce`) with unified entry point and improved argument handling.
22
+ - **📊 Auto-enable Inductor Launch Tracing**: Automatic detection and tracing of PyTorch Inductor-compiled kernels without manual configuration.
23
+ - **🌐 Website Improvements**: Light mode color scheme, improved stack display in Launch Analysis, and better file diff navigation.
24
+
25
+ ### Changes by area
26
+
27
+ #### 🔄 **Reproducer System**
28
+ - **Complete reproducer infrastructure** (PR #117-127):
29
+ - CLI subcommand structure: `tritonparse reproduce <ndjson_file> [options]`
30
+ - NDJSON ingestion layer with IR preservation
31
+ - Context bundle system for kernel metadata and parameters
32
+ - Standardized output paths: `repro_output/<kernel_name>/repro_<timestamp>.py`
33
+ - Template support with placeholder system for custom generation
34
+ - Example templates for tensor loading and kernel invocation
35
+ - Dynamic import generation for kernel dependencies
36
+ - Kernel signature parsing and integration
37
+ - Kernel invocation snippet generation with grid/block configuration
38
+ - **Kernel import modes** (PR #165, #166):
39
+ - `--kernel-import direct`: Import kernel from source file
40
+ - `--kernel-import override-ttir`: Override and inject TTIR for advanced debugging
41
+ - Flexible kernel loading strategies for different debugging workflows
42
+ - **Enhanced tensor handling** (PR #141):
43
+ - Improved tensor metadata logging (shape, dtype, stride, storage offset, device)
44
+ - Better tensor reconstruction quality in generated reproducers
45
+ - Support for non-contiguous tensors (commit 12f1d1b)
46
+ - **Extensible placeholder system** (PR #149):
47
+ - Refactored placeholder replacement with class-based design
48
+ - Support for: `{{KERNEL_IMPORT_PLACEHOLDER}}`, `{{KERNEL_INVOCATION_PLACEHOLDER}}`, `{{KERNEL_SYSPATH_PLACEHOLDER}}`, `{{JSON_FILE_NAME_PLACEHOLDER}}`
49
+ - Easy extension for future template needs
50
+ - **Documentation**: Comprehensive reproducer section in README (PR #161) and Usage Guide in Wiki
51
+
52
+ #### 💾 **TensorBlobManager & Storage**
53
+ - **Production-ready blob storage** (PR #156):
54
+ - Content-addressed storage using BLAKE2b hashing
55
+ - Automatic gzip compression for large tensors (>1MB)
56
+ - Two-level directory structure (`xx/hash.bin.gz`) to avoid filesystem limits
57
+ - Automatic deduplication: identical tensors stored only once
58
+ - Storage quota enforcement (default: 100GB)
59
+ - Per-tensor size limit (default: 10GB) to prevent OOM
60
+ - Real-time statistics: saved count, dedup hits, compression ratio
61
+ - Graceful degradation with warning logs when quota exceeded
62
+ - **Compression support** (PR #157):
63
+ - Configurable compression level (default: 4)
64
+ - Atomic writes using temporary files + rename for safety
65
+ - Hash verification for data integrity
66
+ - **Comprehensive testing** (PR #162):
67
+ - Unit tests for compression, deduplication, quota management
68
+ - Edge case handling and cleanup verification
69
+
70
+ #### 🔧 **SASS Disassembly**
71
+ - **SASS extraction support** (PR #137):
72
+ - New tool: `tritonparse/tools/disasm.py` for CUBIN disassembly
73
+ - Integration into structured logging behind opt-in flag
74
+ - Uses `nvdisasm -c -gp -g -gi` for detailed disassembly
75
+ - Parses output to find function blocks with preserved labels and source mapping
76
+ - **Configuration**:
77
+ - Environment variable: `TRITONPARSE_DUMP_SASS=1`
78
+ - API parameter: `enable_sass_dump=True` in `structured_logging.init()`
79
+ - API parameter takes precedence over environment variable
80
+ - **Robustness**:
81
+ - Error handling for subprocess failures, missing nvdisasm, and generic exceptions
82
+ - Writes marker messages instead of failing the trace
83
+ - Requires NVIDIA CUDA Binary Utilities (nvdisasm)
84
+ - **CUDA testing** (PR #138):
85
+ - Strengthened tests to validate SASS extraction and persistence
86
+
87
+ #### 🎯 **Context Manager & API**
88
+ - **Enhanced context manager** (PR #144, #159):
89
+ - Added `__init__` method with configurable parameters:
90
+ - `enable_trace_launch`: Control trace launch logging
91
+ - `split_inductor_compilations`: Control inductor compilation splitting
92
+ - `**parse_kwargs`: Additional arguments for `unified_parse`
93
+ - Updated `__exit__` to pass parameters through to parsing pipeline
94
+ - More flexible for different use cases and workflows
95
+ - **Split inductor compilations control**:
96
+ - Parameter threading through: `unified_parse()` → `oss_run()` → `parse_logs()` → `parse_single_file()`
97
+ - Renamed from `split_by_frame_id_and_compile_id` to `split_inductor_compilations` for clarity
98
+ - Default `True`: splits by frame_id, frame_compile_id, attempt_id, compiled_autograd_id
99
+ - When `False`: groups all inductor compilations together
100
+ - Follows tlparse's convention
101
+ - **Unit tests** (commit a5338ce):
102
+ - Tests for enhanced context manager behavior
103
+ - Validation of split inductor compilation modes
104
+
105
+ #### ⚡ **CLI & Entry Points**
106
+ - **Subcommand structure** (PR #117):
107
+ - Refactored from single-command to modern subcommand architecture
108
+ - `tritonparse parse <source> [options]` - Run structured log parser
109
+ - `tritonparse reproduce <ndjson_file> [options]` - Generate reproducers
110
+ - Breaking change: old `python run.py <source>` no longer works
111
+ - Extract parser flags into `tritonparse.utils._add_parse_args()`
112
+ - Remove `unified_parse_from_cli` (programmatic `unified_parse()` remains)
113
+ - **Unified entry point** (PR #133):
114
+ - Added proper CLI entry point in package configuration
115
+ - Unified argument handling across commands
116
+ - **CLI entry point fix** (PR #154):
117
+ - Fixed `ModuleNotFoundError` for tritonparse CLI entry point
118
+ - Improved package installation and command availability
119
+
120
+ #### 📊 **Logging & Tracing**
121
+ - **Auto-enable Inductor Launch Tracing** (PR #142):
122
+ - Automatically detect and trace PyTorch Inductor-compiled kernels
123
+ - No manual configuration required for Inductor workflows
124
+ - Seamless integration with existing tracing infrastructure
125
+ - **Kernel source path output** (commit 03bc1e1):
126
+ - Output `kernel_src_path` in trace metadata for better debugging
127
+ - **NDJSON prettifier improvements** (PR #135):
128
+ - Renamed and inverted flag to default-filter IRs
129
+ - More intuitive filtering behavior
130
+ - **Debug flag deprecation** (PR #132):
131
+ - Removed unused debugging flags
132
+ - Cleaner configuration surface
133
+
134
+ #### 🌐 **Website & UI**
135
+ - **Upgraded to Tailwind CSS v4** (commit 6c42d8a):
136
+ - Migrated from PostCSS plugin to `@tailwindcss/vite` for improved performance
137
+ - Updated CSS import syntax from `@tailwind` directives to `@import "tailwindcss"`
138
+ - Removed `tailwind.config.js` and `postcss.config.js` (now CSS-based configuration)
139
+ - Updated `shadow` class naming to v4 convention (`shadow` → `shadow-sm`)
140
+ - Cleaned up global CSS to prevent interference with Tailwind utility classes
141
+ - **Upgraded all frontend dependencies**:
142
+ - Vite: 6.3.5 → 7.1.10
143
+ - React ecosystem: Updated to latest versions (React 19+)
144
+ - TypeScript: 5.7.2 → 5.7.3
145
+ - Added `@types/node` for Node.js type definitions
146
+ - Fixed dompurify security vulnerability (3.1.7 → 3.3.0) via npm overrides
147
+ - **Light mode color scheme** (PR #139):
148
+ - Updated `index.css` to support only light mode
149
+ - Consistent, professional appearance
150
+ - **Improved stack display** (PR #151):
151
+ - Better stack trace visualization in Launch Analysis
152
+ - Clearer debugging information
153
+ - **Documentation cleanup** (PR #172):
154
+ - Removed redundant docs directory and screenshots
155
+ - Streamlined repository structure
156
+
157
+ #### 🔧 **Bug Fixes & Maintenance**
158
+ - **General bug fixes** (PR #153):
159
+ - Multiple stability and reliability improvements
160
+ - Better error handling throughout codebase
161
+ - **Deserialization fix** (commit d4d7a20):
162
+ - Fixed unhandled types in deserialization
163
+ - More robust data loading
164
+ - **README improvements** (PR #158, #164):
165
+ - Refactored and cleaned up README
166
+ - Fixed command typos in reproducer generation examples
167
+ - Clearer installation and usage instructions
168
+ - **Test cleanup** (PR #160):
169
+ - Removed deprecated test for triton_kernels Tensor functionality
170
+ - Updated test suite for current codebase
171
+
172
+ ### Compatibility notes
173
+
174
+ - **Breaking Change**: CLI now uses subcommand structure. Old usage `python run.py <source>` must be updated to `tritonparse parse <source>` or `python run.py parse <source>`.
175
+ - **New Dependencies**: SASS disassembly requires NVIDIA CUDA Binary Utilities (`nvdisasm`). This is optional and only needed if `enable_sass_dump=True`.
176
+ - **Storage**: TensorBlobManager introduces new blob storage directory structure. Default quota is 100GB; configure via `TensorBlobManager` initialization if needed.
177
+ - **Context Manager API**: Enhanced with new parameters. Fully backward compatible with sensible defaults.
178
+
179
+ ### Upgrade guidance
180
+
181
+ 1. **Update CLI commands**: Change `python run.py <source>` to `tritonparse parse <source>` or use the new `tritonparse` command if installed via pip.
182
+ 2. **Reproducer usage**: Use `tritonparse reproduce ./parsed_output/trace.ndjson.gz --line <N> --out-dir <output>` to generate standalone kernel scripts.
183
+ 3. **SASS disassembly**: Opt-in by setting `TRITONPARSE_DUMP_SASS=1` or passing `enable_sass_dump=True` to `structured_logging.init()`. Requires `nvdisasm` in PATH.
184
+ 4. **Tensor storage**: Enable high-fidelity reproduction by using TensorBlobManager (enabled by default when `enable_trace_launch=True`).
185
+ 5. **Context manager**: Use enhanced `TritonParseManager` for more control over tracing and parsing behavior.
186
+
187
+ ## [0.2.3] - 2025-09-19
188
+
189
+ ### TritonParse Release Notes (last 15 commits)
190
+
191
+ - **Date range**: 2025-09-13 — 2025-09-18
192
+ - **Scope**: Website UI/UX, core library, CI/CD & packaging, documentation & testing.
193
+
194
+ ### Highlights
195
+ - **Website File Diff tooling**: Introduced a new Diff Comparison view and File Diff page, preserved diff sessions across navigation, integrated Monaco editor, added preview mode, and shipped a round of UI polish with a URL redirect fix for File Diff navigation.
196
+ - **Kernel Overview**: Added a tiled kernel view toggle to improve dense overviews.
197
+ - **Core**: Added lazy-import support for Triton repo `triton_kernels` custom types, attribution check for `torch._utils_internal`, and safer file mapping cleanup in the log parser.
198
+ - **CI/Packaging**: Refactored dependencies in `pyproject.toml`, removed a legacy Triton install script, and updated GitHub Actions workflows.
199
+ - **Docs & tests**: Improved README guidance; added tests and example outputs; minor UI bug fix in `CopyCodeButton` SVG attributes.
200
+
201
+ ### Changes by area
202
+ - **Website UI/UX**
203
+ - Introduce `DiffComparisonView` and `FileDiffView`; maintain diff session state; integrate Monaco editor; preview mode; UI polish and navigation fixes.
204
+ - Add tiled kernel view toggle in `KernelOverview`.
205
+
206
+ - **Core library**
207
+ - Lazy-import support for `triton_kernels` custom types; extend tensor handling in tests.
208
+ - Add attribution check for `torch._utils_internal`.
209
+ - Refactor file mapping cleanup in `parse_logs`.
210
+
211
+ - **CI/CD & packaging**
212
+ - Refactor dependencies in `pyproject.toml`; remove `.ci/install-triton-pip.sh`.
213
+ - Update GitHub Actions workflows; add helper for `triton_kernels` in CI.
214
+
215
+ - **Docs & testing**
216
+ - Clarify tool purpose and installation in `README.md`.
217
+ - Add tests and sample outputs; small UI component fixes.
218
+
219
+ ### Compatibility notes
220
+ - No breaking changes expected. `triton_kernels` support is optional via lazy import.
221
+
222
+ ### Upgrade guidance
223
+ - Reinstall website dependencies if developing the UI to pick up the Monaco editor.
224
+
225
+ ## [0.2.0] - 2025-09-11
226
+
227
+ ### TritonParse Release Notes (last 27 commits)
228
+
229
+ - **Date range**: 2025-07-25 — 2025-09-11
230
+ - **Scope**: Core library, website UI/UX, performance & scalability, CI/CD & packaging, documentation & maintenance.
231
+
232
+ ### Highlights
233
+ - **Website usability**: Drag-and-drop to open logs; one-click copy in code viewers; sticky, compact kernel selector; footer shows app version, localized build date, and Git short SHA; tensor arguments in Launch Analysis now display concise summaries with expandable details.
234
+ - **Large-file parsing**: Streaming NDJSON parsing and robust gzip handling significantly reduce memory usage and improve stability for files >100 MB.
235
+ - **Core & integrations**: Persist Inductor kernel config into `inductor_metadata` and pass to JIT hooks; ensure Inductor path invokes `jit_post_compile_hook`; new `init_with_env` for environment-based initialization; move compilation timing `times` into `metadata` for automatic frontend rendering.
236
+ - **Releases & versioning**: Adopt setuptools-scm dynamic versioning; add Nightly PyPI publishing; enable stable publishing on tag push; fix nightly version potentially being older than stable; correct packaging license metadata.
237
+ - **CI stability**: Ubuntu 24.04 compatibility; improved CUDA/cuDNN setup and detection; parallelize jobs; add parallel CI for pip-installed Triton; better error visibility in install scripts; upgrade libstdc++.
238
+
239
+ ### Changes by area
240
+ - **Core library**
241
+ - Save Inductor kernel params to `inductor_metadata` and forward to JIT hooks.
242
+ - Manually invoke `jit_post_compile_hook` in the Inductor Triton compile path.
243
+ - Add `init_with_env` that reads `TRITON_TRACE_FOLDER` and `TRITON_TRACE_LAUNCH`.
244
+ - Move compilation `times` into `metadata` so the frontend auto-renders it.
245
+ - Use cached source in compile listener for stability.
246
+ - Refactor source-mapping pipeline into modular units for maintainability.
247
+
248
+ - **Website UI/UX**
249
+ - Drag-and-drop to open supported log files.
250
+ - Copy button in code viewer panels.
251
+ - Sticky/collapsible/compact kernel selector in Kernel Overview; resizable compilation stack trace vertically.
252
+ - Launch Analysis: tensor args show concise summaries with expandable details.
253
+ - Footer displays version, localized build date, and Git short SHA.
254
+ - Streaming NDJSON parsing and improved error handling for large logs.
255
+
256
+ - **Performance & scalability**
257
+ - Use streaming path for files >100 MB to reduce memory peaks and improve robustness.
258
+
259
+ - **CI/CD & packaging**
260
+ - Enable setuptools-scm and nightly PyPI publishing.
261
+ - Publish stable releases on tag push; improve version computation and tag detection.
262
+ - Fix nightly version possibly lagging behind stable; add clear error on missing tags.
263
+ - Add parallel CI for pip-installed Triton; recommend pip installation in docs.
264
+ - Improve Ubuntu 24.04 setup, CUDA/cuDNN handling, and job parallelism.
265
+ - Increase error visibility in install scripts and upgrade libstdc++.
266
+ - Define lower bounds for prerequisites in `pyproject.toml`.
267
+
268
+ - **Docs & maintenance**
269
+ - Move repository to `meta-pytorch` org; update links and guidance; add AI assistant context.
270
+ - Update/restore CONTRIBUTING docs to avoid breaking downstream consumers.
271
+
272
+ - **Testing**
273
+ - Preserve test outputs when `TEST_KEEP_OUTPUT=1` to aid debugging.
274
+
275
+ ### Compatibility notes
276
+ - Versioning & publishing: setuptools-scm with tag-based stable releases and nightly dev versions. Ensure `PYPI_API_TOKEN` is configured in CI if publishing is intended.
277
+ - Data format: compilation timing `times` moved under `metadata`; update any downstream scripts that referenced the old location.
278
+ - Build metadata: footer shows localized build date and Git short SHA; restart dev server to refresh these values.
279
+
280
+ ### Upgrade guidance
281
+ - Prefer Triton from PyPI (≥ 3.4.0) and adhere to the lower bounds declared in `pyproject.toml`.
282
+ - For deterministic build metadata in the website, set `BUILD_DATE` and `GIT_COMMIT_SHA_SHORT` in the environment when running dev/build.
283
+
284
+
285
+ ## [0.1.1] - 2025-07-25
286
+
287
+ ### Added
288
+
289
+ - **Launch Difference Analysis**: A new `launch_diff` event is automatically generated for each kernel, providing a concise summary of how launch parameters vary across different calls. This helps to quickly identify changes in kernel arguments, grid dimensions, and other metadata.
290
+ - **Enhanced Web UI for Launch Analysis**: The web interface now visualizes the `launch_diff` data, offering an interactive way to explore how kernel launches differ. It includes a detailed breakdown of constant vs. varying parameters and their value distributions.
291
+ - **Kernel-Centric Event Grouping**: The parser now intelligently groups compilation and launch events by kernel, making it easier to analyze the entire lifecycle of a specific kernel.
292
+ - **Launch Event Tracing Control**: Added an `enable_trace_launch` parameter to `tritonparse.structured_logging.init` to give users explicit control over whether to trace kernel launch events.
293
+ - **Enhanced Logging and Testing**: Improved the structured logging initialization and expanded test coverage to verify the correctness of `launch` and `compilation` event counts.
294
+
295
+ ## [0.1.0] - 2025-07-21
296
+
297
+ This is the initial public release of TritonParse.
298
+
299
+ ### Added
300
+
301
+ - **Interactive Web Interface**: A rich, client-side web UI for exploring, comparing, and understanding Triton IRs. Features side-by-side code views, synchronized highlighting, and detailed metadata panels.
302
+ - **Structured Logging Backend**: A powerful Python backend to capture detailed information from the Triton compiler and runtime, including IRs (TTIR, TTGIR, PTX, AMDGCN), metadata, timings, and Python source code, and outputs it as structured NDJSON logs.
303
+ - **Source-to-Source Mapping**: Automatic generation of bidirectional mappings between Python code and all intermediate representations (IRs), allowing you to trace a line of Python code all the way down to the generated assembly and back.
304
+ - **Kernel Launch Tracing**: Capability to trace each kernel launch, capturing the grid dimensions, kernel arguments (with detailed tensor information), and other runtime metadata.
305
+ - **Flexible Log Parsing CLI**: A command-line interface (`run.py`) to parse logs from local files or directories, and from single or multiple ranks in a distributed training job.
306
+ - **Prerequisites Documentation**: Clear requirements for Python (>=3.10), PyTorch, and Triton (>3.3.1, compiled from source).
307
+ - **Getting Started Guide**: A step-by-step workflow for generating, parsing, and visualizing traces.
308
+ - **Configuration via Environment Variables**: Support for `TRITON_TRACE`, `TRITON_TRACE_LAUNCH`, `TRITONPARSE_KERNEL_ALLOWLIST`, and `TRITON_TRACE_GZIP`.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tritonparse
3
- Version: 0.2.4.dev20251013071533
3
+ Version: 0.3.0
4
4
  Summary: TritonParse: A Compiler Tracer, Visualizer, and mini-Reproducer Generator for Triton Kernels
5
5
  Author-email: Yueming Hao <yhao@meta.com>
6
6
  License-Expression: BSD-3-Clause
@@ -73,6 +73,7 @@ def main():
73
73
  line_index=args.line,
74
74
  out_dir=args.out_dir,
75
75
  template=args.template,
76
+ kernel_import=args.kernel_import,
76
77
  )
77
78
  else:
78
79
  raise RuntimeError(f"Unknown command: {args.func}")
@@ -1,5 +1,7 @@
1
1
  import argparse
2
2
 
3
+ from tritonparse.reproducer.types import KernelImportMode
4
+
3
5
 
4
6
  def _add_reproducer_args(parser: argparse.ArgumentParser) -> None:
5
7
  """Add common arguments for the reproducer to a parser."""
@@ -29,3 +31,16 @@ def _add_reproducer_args(parser: argparse.ArgumentParser) -> None:
29
31
  "Defaults to 'example'."
30
32
  ),
31
33
  )
34
+ parser.add_argument(
35
+ "--kernel-import",
36
+ type=KernelImportMode,
37
+ choices=list(KernelImportMode),
38
+ default=KernelImportMode.DEFAULT,
39
+ help=(
40
+ "Kernel import strategy:\n"
41
+ " default: Import kernel from original file (current behavior)\n"
42
+ " copy: Embed kernel source code directly in reproducer\n"
43
+ " override-ttir: Use TTIR from compilation event (bypass Python frontend)\n"
44
+ "Defaults to 'default'."
45
+ ),
46
+ )
@@ -7,6 +7,7 @@ from tritonparse.reproducer.placeholder_replacer import (
7
7
  PlaceholderReplacer,
8
8
  )
9
9
  from tritonparse.reproducer.templates.loader import load_template_code
10
+ from tritonparse.reproducer.types import KernelImportMode
10
11
  from tritonparse.reproducer.utils import determine_output_paths
11
12
 
12
13
  from tritonparse.tools.prettify_ndjson import load_ndjson, save_prettified_json
@@ -19,6 +20,7 @@ def reproduce(
19
20
  out_dir: str,
20
21
  template: str,
21
22
  replacer: Optional[PlaceholderReplacer] = None,
23
+ kernel_import: KernelImportMode = KernelImportMode.DEFAULT,
22
24
  ) -> dict[str, Path]:
23
25
  """
24
26
  Generate a reproducer script from NDJSON trace file.
@@ -29,6 +31,7 @@ def reproduce(
29
31
  out_dir: Output directory for reproducer files.
30
32
  template: Template name to use for the reproducer.
31
33
  replacer: Optional custom PlaceholderReplacer instance. If None, uses DefaultPlaceholderReplacer.
34
+ kernel_import: Kernel import mode (DEFAULT or COPY).
32
35
  """
33
36
  logger.debug(f"Building bundle from {input_path} at line {line_index}")
34
37
  events = load_ndjson(Path(input_path))
@@ -43,6 +46,15 @@ def reproduce(
43
46
  out_dir, context_bundle.kernel_info.function_name
44
47
  )
45
48
  save_prettified_json(context_bundle.raw_launch_event, temp_json_path)
49
+
50
+ # Save compilation event JSON if using OVERRIDE_TTIR mode
51
+ comp_json_path = None
52
+ if kernel_import == KernelImportMode.OVERRIDE_TTIR:
53
+ comp_json_path = (
54
+ temp_json_path.parent / f"{temp_json_path.stem}_compilation.json"
55
+ )
56
+ save_prettified_json(context_bundle.raw_comp_event, comp_json_path)
57
+
46
58
  logger.debug("Loading reproducer template.")
47
59
  template_code = load_template_code(template)
48
60
 
@@ -51,7 +63,11 @@ def reproduce(
51
63
  if replacer is None:
52
64
  replacer = DefaultPlaceholderReplacer()
53
65
  final_code = replacer.replace(
54
- template_code, context_bundle, temp_json_path=temp_json_path
66
+ template_code,
67
+ context_bundle,
68
+ temp_json_path=temp_json_path,
69
+ kernel_import=kernel_import,
70
+ comp_json_filename=comp_json_path.name if comp_json_path else None,
55
71
  )
56
72
 
57
73
  out_py_path.write_text(final_code, encoding="utf-8")
@@ -0,0 +1,227 @@
1
+ from abc import ABC
2
+
3
+ from typing import Any, Dict, Protocol
4
+
5
+ from tritonparse.reproducer.ingestion.ndjson import ContextBundle
6
+ from tritonparse.reproducer.types import KernelImportMode
7
+ from tritonparse.reproducer.utils import (
8
+ _generate_import_statements,
9
+ _generate_invocation_snippet,
10
+ _parse_kernel_signature,
11
+ )
12
+
13
+
14
+ class HandlerProtocol(Protocol):
15
+ def __call__(
16
+ self, code: str, context_bundle: ContextBundle, **kwargs: Any
17
+ ) -> str: ...
18
+
19
+
20
+ class PlaceholderReplacer(ABC):
21
+ """
22
+ Abstract base class for template placeholder replacement.
23
+
24
+ Subclasses should register replacement handlers in their __init__ method
25
+ by calling self.register(placeholder, handler_function).
26
+
27
+ Each handler function should have the signature:
28
+ handler(code: str, context_bundle: ContextBundle, **kwargs) -> str
29
+ """
30
+
31
+ def __init__(self):
32
+ # Dictionary mapping placeholder strings to handler functions
33
+ self.handlers: Dict[str, HandlerProtocol] = {}
34
+
35
+ def register(self, placeholder: str, handler: HandlerProtocol):
36
+ """
37
+ Register a handler function for a specific placeholder.
38
+
39
+ Args:
40
+ placeholder: The placeholder string to replace (e.g., "{{JSON_FILE_NAME_PLACEHOLDER}}")
41
+ handler: A callable that takes (code, context_bundle, **kwargs) and returns modified code
42
+ """
43
+ self.handlers[placeholder] = handler
44
+
45
+ def replace(
46
+ self, template_code: str, context_bundle: ContextBundle, **kwargs: Any
47
+ ) -> str:
48
+ """
49
+ Replace all registered placeholders in the template code.
50
+
51
+ Args:
52
+ template_code: The template code containing placeholders
53
+ context_bundle: Context information about the kernel
54
+ **kwargs: Additional keyword arguments passed to handler functions
55
+
56
+ Returns:
57
+ The code with all placeholders replaced
58
+ """
59
+ code = template_code
60
+ for placeholder, handler in self.handlers.items():
61
+ code = handler(code, context_bundle, **kwargs)
62
+ return code
63
+
64
+
65
+ class DefaultPlaceholderReplacer(PlaceholderReplacer):
66
+ """
67
+ Default implementation of PlaceholderReplacer.
68
+
69
+ Handles the following placeholders:
70
+ - {{JSON_FILE_NAME_PLACEHOLDER}}: Replaced with the JSON file name
71
+ - # {{KERNEL_SYSPATH_PLACEHOLDER}}: Replaced with sys.path setup code
72
+ - # {{KERNEL_IMPORT_PLACEHOLDER}}: Replaced with kernel import statement
73
+ - # {{KERNEL_INVOCATION_PLACEHOLDER}}: Replaced with kernel invocation code
74
+ """
75
+
76
+ def __init__(self):
77
+ super().__init__()
78
+ # Register all default handlers
79
+ self.register("{{JSON_FILE_NAME_PLACEHOLDER}}", self._replace_json_filename)
80
+ self.register(
81
+ "# {{IR_OVERRIDE_SETUP_PLACEHOLDER}}", self._replace_ir_override_setup
82
+ )
83
+ self.register("# {{KERNEL_SYSPATH_PLACEHOLDER}}", self._replace_kernel_syspath)
84
+ self.register("# {{KERNEL_IMPORT_PLACEHOLDER}}", self._replace_kernel_import)
85
+ self.register(
86
+ "# {{KERNEL_INVOCATION_PLACEHOLDER}}", self._replace_kernel_invocation
87
+ )
88
+
89
+ def _replace_json_filename(
90
+ self, code: str, context_bundle: ContextBundle, **kwargs
91
+ ) -> str:
92
+ """Replace the JSON file name placeholder."""
93
+ temp_json_path = kwargs.get("temp_json_path")
94
+ if temp_json_path is None:
95
+ raise ValueError("temp_json_path is required for JSON filename replacement")
96
+ return code.replace("{{JSON_FILE_NAME_PLACEHOLDER}}", temp_json_path.name)
97
+
98
+ def _replace_ir_override_setup(
99
+ self, code: str, context_bundle: ContextBundle, **kwargs
100
+ ) -> str:
101
+ """Replace the IR override setup placeholder."""
102
+ kernel_import = kwargs.get("kernel_import", KernelImportMode.DEFAULT)
103
+
104
+ if kernel_import != KernelImportMode.OVERRIDE_TTIR:
105
+ return code.replace("# {{IR_OVERRIDE_SETUP_PLACEHOLDER}}", "")
106
+
107
+ comp_json_filename = kwargs.get("comp_json_filename")
108
+ if not comp_json_filename:
109
+ raise ValueError("comp_json_filename is required for OVERRIDE_TTIR mode")
110
+
111
+ setup_code = f'''
112
+ def create_ttir_tempfile():
113
+ """Extract TTIR from compilation event and create temporary file."""
114
+ script_dir = Path(__file__).resolve().parent
115
+ comp_json_file = script_dir / "{comp_json_filename}"
116
+
117
+ with open(comp_json_file, 'r') as f:
118
+ comp_data = json.load(f)
119
+
120
+ # Extract TTIR content
121
+ kernel_name = comp_data['payload']['metadata']['name']
122
+ ttir_key = f"{{kernel_name}}.ttir"
123
+ ttir_content = comp_data['payload']['file_content'][ttir_key]
124
+
125
+ # Create temporary file
126
+ temp_file = tempfile.NamedTemporaryFile(
127
+ mode='w',
128
+ suffix='.ttir',
129
+ delete=False,
130
+ prefix=f'{{kernel_name}}_'
131
+ )
132
+ temp_file.write(ttir_content)
133
+ temp_file.close()
134
+ return temp_file.name
135
+
136
+
137
+ # Monkeypatch triton.autotune to use our TTIR
138
+ _ttir_file = create_ttir_tempfile()
139
+ _original_autotune = None
140
+
141
+ def _patched_autotune(configs, key=None, **kwargs):
142
+ """Patched autotune that uses our TTIR file."""
143
+ import triton
144
+ # Replace configs with our single config using ir_override
145
+ new_configs = [triton.Config(kwargs={{}}, ir_override=_ttir_file)]
146
+ # Call original autotune with our config
147
+ return _original_autotune(new_configs, key=[], **kwargs)
148
+
149
+ # Apply the monkeypatch before importing the kernel
150
+ import triton
151
+ _original_autotune = triton.autotune
152
+ triton.autotune = _patched_autotune
153
+ '''
154
+
155
+ return code.replace("# {{IR_OVERRIDE_SETUP_PLACEHOLDER}}", setup_code)
156
+
157
+ def _replace_kernel_syspath(
158
+ self, code: str, context_bundle: ContextBundle, **kwargs
159
+ ) -> str:
160
+ """Replace the kernel sys.path placeholder."""
161
+ kernel_import = kwargs.get("kernel_import", KernelImportMode.DEFAULT)
162
+
163
+ if kernel_import == KernelImportMode.DEFAULT:
164
+ sys_stmt, _ = _generate_import_statements(context_bundle.kernel_info)
165
+ return code.replace("# {{KERNEL_SYSPATH_PLACEHOLDER}}", sys_stmt)
166
+ elif kernel_import == KernelImportMode.COPY:
167
+ comment = (
168
+ "# Kernel sys.path setup skipped - kernel source code embedded below"
169
+ )
170
+ return code.replace("# {{KERNEL_SYSPATH_PLACEHOLDER}}", comment)
171
+ elif kernel_import == KernelImportMode.OVERRIDE_TTIR:
172
+ comment = "# Kernel sys.path setup skipped - using IR override mode"
173
+ return code.replace("# {{KERNEL_SYSPATH_PLACEHOLDER}}", comment)
174
+ else:
175
+ raise ValueError(f"Unknown kernel_import mode: {kernel_import}")
176
+
177
+ def _replace_kernel_import(
178
+ self, code: str, context_bundle: ContextBundle, **kwargs
179
+ ) -> str:
180
+ """Replace the kernel import placeholder."""
181
+ kernel_import = kwargs.get("kernel_import", KernelImportMode.DEFAULT)
182
+
183
+ if kernel_import == KernelImportMode.DEFAULT:
184
+ _, import_statement = _generate_import_statements(
185
+ context_bundle.kernel_info
186
+ )
187
+ return code.replace("# {{KERNEL_IMPORT_PLACEHOLDER}}", import_statement)
188
+ elif kernel_import == KernelImportMode.COPY:
189
+ source_code = context_bundle.kernel_info.source_code
190
+ func_name = context_bundle.kernel_info.function_name
191
+
192
+ if not source_code or not source_code.strip():
193
+ raise ValueError("Kernel source code is empty, cannot use 'copy' mode")
194
+ if not func_name:
195
+ raise ValueError(
196
+ "Cannot determine kernel function name for 'copy' mode"
197
+ )
198
+
199
+ # Add common imports needed for most Triton kernels
200
+ import_lines = [
201
+ "import torch",
202
+ "import numpy as np",
203
+ "import triton",
204
+ "import triton.language as tl",
205
+ "",
206
+ ]
207
+
208
+ # Combine: imports + kernel source code + alias
209
+ embedded_code = "\n".join(import_lines)
210
+ embedded_code += "\n" + source_code
211
+ embedded_code += f"\n\n# Use kernel function directly\nimported_kernel_function = {func_name}"
212
+
213
+ return code.replace("# {{KERNEL_IMPORT_PLACEHOLDER}}", embedded_code)
214
+ elif kernel_import == KernelImportMode.OVERRIDE_TTIR:
215
+ comment = "# Kernel import skipped - using IR override mode with TTIR"
216
+ return code.replace("# {{KERNEL_IMPORT_PLACEHOLDER}}", comment)
217
+ else:
218
+ raise ValueError(f"Unknown kernel_import mode: {kernel_import}")
219
+
220
+ def _replace_kernel_invocation(
221
+ self, code: str, context_bundle: ContextBundle, **kwargs
222
+ ) -> str:
223
+ """Replace the kernel invocation placeholder."""
224
+ source_code = context_bundle.kernel_info.source_code
225
+ pos_args, kw_args = _parse_kernel_signature(source_code)
226
+ invocation_snippet = _generate_invocation_snippet(pos_args, kw_args)
227
+ return code.replace("# {{KERNEL_INVOCATION_PLACEHOLDER}}", invocation_snippet)
@@ -6,6 +6,7 @@ It contains a smallest testing example for a Triton kernel.
6
6
  import gzip
7
7
  import hashlib
8
8
  import importlib
9
+ import importlib.util
9
10
  import io
10
11
  import json
11
12
  import logging
@@ -16,6 +17,8 @@ from typing import Union
16
17
 
17
18
  import torch
18
19
 
20
+ # {{IR_OVERRIDE_SETUP_PLACEHOLDER}}
21
+
19
22
  # {{KERNEL_SYSPATH_PLACEHOLDER}}
20
23
 
21
24
  # {{KERNEL_IMPORT_PLACEHOLDER}}
@@ -0,0 +1,18 @@
1
+ from enum import Enum
2
+
3
+
4
+ class KernelImportMode(str, Enum):
5
+ """
6
+ Kernel import strategy for reproducer generation.
7
+
8
+ Inherits from str to allow direct string comparison and use in argparse.
9
+
10
+ Attributes:
11
+ DEFAULT: Import kernel from original file (current behavior).
12
+ COPY: Embed kernel source code directly in reproducer.
13
+ OVERRIDE_TTIR: Use TTIR from compilation event with monkeypatch.
14
+ """
15
+
16
+ DEFAULT = "default"
17
+ COPY = "copy"
18
+ OVERRIDE_TTIR = "override-ttir"