tritonparse 0.2.4.dev20251008071501__tar.gz → 0.2.4.dev20251009071511__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tritonparse might be problematic. Click here for more details.
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.github/workflows/test.yml +1 -7
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/PKG-INFO +43 -3
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/README.md +42 -2
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/test_tritonparse.py +275 -97
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse.egg-info/PKG-INFO +43 -3
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse.egg-info/SOURCES.txt +0 -2
- tritonparse-0.2.4.dev20251008071501/.ci/install-triton-kernels.sh +0 -87
- tritonparse-0.2.4.dev20251008071501/tests/example_output/repro/repro_context_20250816192455.json +0 -448
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.ci/README.md +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.ci/install-project.sh +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.ci/install-triton.sh +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.ci/run-tests.sh +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.ci/setup.sh +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.github/PAGES_SETUP.md +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.github/workflows/deploy-pages-standalone.yml +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.github/workflows/deploy-pages.yml +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.github/workflows/nightly-pypi.yml +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/.gitignore +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/CHANGELOG.md +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/CODE_OF_CONDUCT.md +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/CONTRIBUTING.md +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/LICENSE +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/Makefile +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/__init__.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/docs/README.md +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/docs/screenshots/code-comparison.png +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/docs/screenshots/kernel-overview.png +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/pyproject.toml +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/run.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/setup.cfg +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/README.md +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/__init__.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/example_output/logs/dedicated_log_triton_trace_findhao_.ndjson +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/example_output/parsed_output/dedicated_log_triton_trace_findhao__mapped.ndjson.gz +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/example_output/parsed_output/f0_fc0_a0_cai-.ndjson.gz +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/example_output/parsed_output/log_file_list.json +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/example_output/parsed_output_complex/dedicated_log_triton_trace_findhao__mapped.ndjson.gz +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/example_output/parsed_output_complex/log_file_list.json +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tests/test_add.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/__init__.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/__main__.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/cli.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/common.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/context_manager.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/event_diff.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/extract_source_mappings.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/ir_parser.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/mapper.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/reproducer/__init__.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/reproducer/cli.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/reproducer/ingestion/ndjson.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/reproducer/orchestrator.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/reproducer/placeholder_replacer.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/reproducer/templates/__init__.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/reproducer/templates/example.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/reproducer/templates/loader.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/reproducer/utils.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/shared_vars.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/source_type.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/sourcemap_utils.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/structured_logging.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/tools/__init__.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/tools/decompress_bin_ndjson.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/tools/disasm.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/tools/format_fix.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/tools/load_tensor.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/tools/prettify_ndjson.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/tools/readme.md +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/tp_logger.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/trace_processor.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse/utils.py +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse.egg-info/dependency_links.txt +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse.egg-info/entry_points.txt +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse.egg-info/requires.txt +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/tritonparse.egg-info/top_level.txt +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/eslint.config.js +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/index.html +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/package-lock.json +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/package.json +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/postcss.config.js +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/public/dedicated_log_triton_trace_findhao__mapped.ndjson.gz +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/public/f0_fc0_a0_cai-.ndjson +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/public/favicon.ico +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/public/logo.svg +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/scripts/inline-html.js +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/App.css +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/App.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/assets/react.svg +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/ArgumentViewer.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/Callstack.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/CodeComparisonView.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/CodeViewer.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/CompilationInfo.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/CopyCodeButton.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/DataSourceSelector.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/DiffComparisonView.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/DiffViewer.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/ExternalLink.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/SingleCodeViewer.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/StackDiffViewer.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/ToggleSwitch.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/TritonIRs.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/components/WelcomeScreen.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/context/FileDiffSession.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/index.css +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/main.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/pages/CodeView.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/pages/FileDiffView.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/pages/KernelOverview.tsx +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/utils/dataLoader.ts +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/utils/fbDetection.ts +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/utils/safeImport.ts +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/utils/tensor.ts +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/src/vite-env.d.ts +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/tailwind.config.js +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/tsconfig.app.json +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/tsconfig.json +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/tsconfig.node.json +0 -0
- {tritonparse-0.2.4.dev20251008071501 → tritonparse-0.2.4.dev20251009071511}/website/vite.config.ts +0 -0
|
@@ -142,7 +142,7 @@ jobs:
|
|
|
142
142
|
TRITON_COMMIT: ${{ steps.triton-commit.outputs.commit }}
|
|
143
143
|
run: |
|
|
144
144
|
bash .ci/install-triton.sh
|
|
145
|
-
|
|
145
|
+
|
|
146
146
|
- name: Install project dependencies
|
|
147
147
|
env:
|
|
148
148
|
CONDA_ENV: tritonparse
|
|
@@ -192,12 +192,6 @@ jobs:
|
|
|
192
192
|
run: |
|
|
193
193
|
bash .ci/setup.sh
|
|
194
194
|
|
|
195
|
-
- name: Install Triton kernels
|
|
196
|
-
env:
|
|
197
|
-
CONDA_ENV: tritonparse-pip
|
|
198
|
-
run: |
|
|
199
|
-
bash .ci/install-triton-kernels.sh
|
|
200
|
-
|
|
201
195
|
- name: Install project dependencies
|
|
202
196
|
env:
|
|
203
197
|
CONDA_ENV: tritonparse-pip
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tritonparse
|
|
3
|
-
Version: 0.2.4.
|
|
3
|
+
Version: 0.2.4.dev20251009071511
|
|
4
4
|
Summary: TritonParse: A Compiler Tracer, Visualizer, and mini-Reproducer Generator for Triton Kernels
|
|
5
5
|
Author-email: Yueming Hao <yhao@meta.com>
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -34,13 +34,18 @@ Dynamic: license-file
|
|
|
34
34
|
- **📝 Multi-format IR Support** - View TTGIR, TTIR, LLIR, PTX, and AMDGCN
|
|
35
35
|
- **🎯 Interactive Code Views** - Click-to-highlight corresponding lines across IR stages
|
|
36
36
|
|
|
37
|
+
### 🔧 Reproducer & Debugging Tools
|
|
38
|
+
- **🔄 Standalone Script Generation** - Extract any kernel into a self-contained Python script
|
|
39
|
+
- **💾 Tensor Data Reconstruction** - Preserve actual tensor data or use statistical approximation
|
|
40
|
+
- **🎯 Custom Templates** - Flexible reproducer templates for different workflows
|
|
41
|
+
- **🐛 Bug Isolation** - Share reproducible test cases for debugging and collaboration
|
|
42
|
+
|
|
37
43
|
### 📊 Structured Logging & Analysis
|
|
38
44
|
- **📝 Compilation & Launch Tracing** - Capture detailed events with source mapping
|
|
39
45
|
- **🔍 Stack Trace Integration** - Full Python stack traces for debugging
|
|
40
46
|
- **📈 Metadata Extraction** - Comprehensive kernel statistics
|
|
41
47
|
|
|
42
48
|
### 🛠️ Developer Tools
|
|
43
|
-
- **🔧 Reproducer Generation** - Generate standalone Python scripts to reproduce kernels
|
|
44
49
|
- **🌐 Browser-based Interface** - No installation required, works in your browser
|
|
45
50
|
- **🔒 Privacy-first** - All processing happens locally, no data uploaded
|
|
46
51
|
|
|
@@ -87,6 +92,41 @@ tritonparse.utils.unified_parse("./logs/", out="./parsed_output")
|
|
|
87
92
|
|
|
88
93
|
> **🔒 Privacy Note**: Your trace files are processed entirely in your browser - nothing is uploaded to any server!
|
|
89
94
|
|
|
95
|
+
### 3. Generate Reproducers (Optional)
|
|
96
|
+
|
|
97
|
+
Extract any kernel into a standalone, executable Python script for debugging or testing:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# Generate reproducer from first launch event
|
|
101
|
+
tritonparse reproduce ./parsed_output/trace.ndjson.gz --line 2 --out-dir repro_output
|
|
102
|
+
|
|
103
|
+
# Run the generated reproducer
|
|
104
|
+
cd repro_output/<kernel_name>/
|
|
105
|
+
python repro_*.py
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Python API:**
|
|
109
|
+
```python
|
|
110
|
+
from tritonparse.reproducer.orchestrator import reproduce
|
|
111
|
+
|
|
112
|
+
result = reproduce(
|
|
113
|
+
input_path="./parsed_output/trace.ndjson.gz",
|
|
114
|
+
line_index=1, # Which launch event (1-based)
|
|
115
|
+
out_dir="repro_output"
|
|
116
|
+
)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
<details>
|
|
120
|
+
<summary>🎯 Common Reproducer Use Cases (click to expand)</summary>
|
|
121
|
+
|
|
122
|
+
- **🐛 Bug Isolation**: Extract a failing kernel into a minimal standalone script
|
|
123
|
+
- **⚡ Performance Testing**: Benchmark specific kernels without running the full application
|
|
124
|
+
- **🤝 Team Collaboration**: Share reproducible test cases with colleagues or in bug reports
|
|
125
|
+
- **📊 Regression Testing**: Compare kernel behavior and performance across different versions
|
|
126
|
+
- **🔍 Deep Debugging**: Modify and experiment with kernel parameters in isolation
|
|
127
|
+
|
|
128
|
+
</details>
|
|
129
|
+
|
|
90
130
|
## 🛠️ Installation
|
|
91
131
|
|
|
92
132
|
**For basic usage (trace generation):**
|
|
@@ -117,7 +157,7 @@ pip install triton
|
|
|
117
157
|
|----------|-------------|
|
|
118
158
|
| **[🏠 Wiki Home](https://github.com/meta-pytorch/tritonparse/wiki)** | Complete documentation and quick navigation |
|
|
119
159
|
| **[📦 Installation](https://github.com/meta-pytorch/tritonparse/wiki/01.-Installation)** | Setup guide for all scenarios |
|
|
120
|
-
| **[📋 Usage Guide](https://github.com/meta-pytorch/tritonparse/wiki/02.-Usage-Guide)** | Complete workflow,
|
|
160
|
+
| **[📋 Usage Guide](https://github.com/meta-pytorch/tritonparse/wiki/02.-Usage-Guide)** | Complete workflow, reproducer generation, and examples |
|
|
121
161
|
| **[🌐 Web Interface](https://github.com/meta-pytorch/tritonparse/wiki/03.-Web-Interface-Guide)** | Master the visualization interface |
|
|
122
162
|
| **[🔧 Developer Guide](https://github.com/meta-pytorch/tritonparse/wiki/04.-Developer-Guide)** | Contributing and architecture overview |
|
|
123
163
|
| **[📝 Code Formatting](https://github.com/meta-pytorch/tritonparse/wiki/05.-Code-Formatting)** | Formatting standards and tools |
|
|
@@ -16,13 +16,18 @@
|
|
|
16
16
|
- **📝 Multi-format IR Support** - View TTGIR, TTIR, LLIR, PTX, and AMDGCN
|
|
17
17
|
- **🎯 Interactive Code Views** - Click-to-highlight corresponding lines across IR stages
|
|
18
18
|
|
|
19
|
+
### 🔧 Reproducer & Debugging Tools
|
|
20
|
+
- **🔄 Standalone Script Generation** - Extract any kernel into a self-contained Python script
|
|
21
|
+
- **💾 Tensor Data Reconstruction** - Preserve actual tensor data or use statistical approximation
|
|
22
|
+
- **🎯 Custom Templates** - Flexible reproducer templates for different workflows
|
|
23
|
+
- **🐛 Bug Isolation** - Share reproducible test cases for debugging and collaboration
|
|
24
|
+
|
|
19
25
|
### 📊 Structured Logging & Analysis
|
|
20
26
|
- **📝 Compilation & Launch Tracing** - Capture detailed events with source mapping
|
|
21
27
|
- **🔍 Stack Trace Integration** - Full Python stack traces for debugging
|
|
22
28
|
- **📈 Metadata Extraction** - Comprehensive kernel statistics
|
|
23
29
|
|
|
24
30
|
### 🛠️ Developer Tools
|
|
25
|
-
- **🔧 Reproducer Generation** - Generate standalone Python scripts to reproduce kernels
|
|
26
31
|
- **🌐 Browser-based Interface** - No installation required, works in your browser
|
|
27
32
|
- **🔒 Privacy-first** - All processing happens locally, no data uploaded
|
|
28
33
|
|
|
@@ -69,6 +74,41 @@ tritonparse.utils.unified_parse("./logs/", out="./parsed_output")
|
|
|
69
74
|
|
|
70
75
|
> **🔒 Privacy Note**: Your trace files are processed entirely in your browser - nothing is uploaded to any server!
|
|
71
76
|
|
|
77
|
+
### 3. Generate Reproducers (Optional)
|
|
78
|
+
|
|
79
|
+
Extract any kernel into a standalone, executable Python script for debugging or testing:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
# Generate reproducer from first launch event
|
|
83
|
+
tritonparse reproduce ./parsed_output/trace.ndjson.gz --line 2 --out-dir repro_output
|
|
84
|
+
|
|
85
|
+
# Run the generated reproducer
|
|
86
|
+
cd repro_output/<kernel_name>/
|
|
87
|
+
python repro_*.py
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Python API:**
|
|
91
|
+
```python
|
|
92
|
+
from tritonparse.reproducer.orchestrator import reproduce
|
|
93
|
+
|
|
94
|
+
result = reproduce(
|
|
95
|
+
input_path="./parsed_output/trace.ndjson.gz",
|
|
96
|
+
line_index=1, # Which launch event (1-based)
|
|
97
|
+
out_dir="repro_output"
|
|
98
|
+
)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
<details>
|
|
102
|
+
<summary>🎯 Common Reproducer Use Cases (click to expand)</summary>
|
|
103
|
+
|
|
104
|
+
- **🐛 Bug Isolation**: Extract a failing kernel into a minimal standalone script
|
|
105
|
+
- **⚡ Performance Testing**: Benchmark specific kernels without running the full application
|
|
106
|
+
- **🤝 Team Collaboration**: Share reproducible test cases with colleagues or in bug reports
|
|
107
|
+
- **📊 Regression Testing**: Compare kernel behavior and performance across different versions
|
|
108
|
+
- **🔍 Deep Debugging**: Modify and experiment with kernel parameters in isolation
|
|
109
|
+
|
|
110
|
+
</details>
|
|
111
|
+
|
|
72
112
|
## 🛠️ Installation
|
|
73
113
|
|
|
74
114
|
**For basic usage (trace generation):**
|
|
@@ -99,7 +139,7 @@ pip install triton
|
|
|
99
139
|
|----------|-------------|
|
|
100
140
|
| **[🏠 Wiki Home](https://github.com/meta-pytorch/tritonparse/wiki)** | Complete documentation and quick navigation |
|
|
101
141
|
| **[📦 Installation](https://github.com/meta-pytorch/tritonparse/wiki/01.-Installation)** | Setup guide for all scenarios |
|
|
102
|
-
| **[📋 Usage Guide](https://github.com/meta-pytorch/tritonparse/wiki/02.-Usage-Guide)** | Complete workflow,
|
|
142
|
+
| **[📋 Usage Guide](https://github.com/meta-pytorch/tritonparse/wiki/02.-Usage-Guide)** | Complete workflow, reproducer generation, and examples |
|
|
103
143
|
| **[🌐 Web Interface](https://github.com/meta-pytorch/tritonparse/wiki/03.-Web-Interface-Guide)** | Master the visualization interface |
|
|
104
144
|
| **[🔧 Developer Guide](https://github.com/meta-pytorch/tritonparse/wiki/04.-Developer-Guide)** | Contributing and architecture overview |
|
|
105
145
|
| **[📝 Code Formatting](https://github.com/meta-pytorch/tritonparse/wiki/05.-Code-Formatting)** | Formatting standards and tools |
|
|
@@ -7,7 +7,6 @@ TORCHINDUCTOR_FX_GRAPH_CACHE=0 TRITONPARSE_DEBUG=1 python -m unittest tests.test
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
import gzip
|
|
10
|
-
import importlib.util
|
|
11
10
|
import json
|
|
12
11
|
import os
|
|
13
12
|
import shutil
|
|
@@ -33,8 +32,6 @@ from tritonparse.shared_vars import TEST_KEEP_OUTPUT
|
|
|
33
32
|
from tritonparse.structured_logging import convert, extract_python_source_info
|
|
34
33
|
from tritonparse.tools.disasm import is_nvdisasm_available
|
|
35
34
|
|
|
36
|
-
HAS_TRITON_KERNELS = importlib.util.find_spec("triton_kernels") is not None
|
|
37
|
-
|
|
38
35
|
|
|
39
36
|
def create_fresh_triton_cache():
|
|
40
37
|
"""Create a fresh Triton cache directory and return cache management context"""
|
|
@@ -108,7 +105,6 @@ def clear_all_caches(*kernels):
|
|
|
108
105
|
# Reset torch compiler state
|
|
109
106
|
torch.compiler.reset()
|
|
110
107
|
torch._dynamo.reset()
|
|
111
|
-
torch._inductor.metrics.reset()
|
|
112
108
|
print("✓ Reset torch compiler, dynamo, and inductor state")
|
|
113
109
|
|
|
114
110
|
# Clear Triton kernel device caches for all provided kernels
|
|
@@ -1053,99 +1049,6 @@ class TestTritonparseCUDA(unittest.TestCase):
|
|
|
1053
1049
|
print("✓ Cleaned up temporary directory")
|
|
1054
1050
|
tritonparse.structured_logging.clear_logging_config()
|
|
1055
1051
|
|
|
1056
|
-
@unittest.skipUnless(torch.cuda.is_available(), "CUDA not available")
|
|
1057
|
-
@unittest.skipUnless(HAS_TRITON_KERNELS, "triton_kernels not installed")
|
|
1058
|
-
def test_triton_kernels_Tensor(self):
|
|
1059
|
-
from triton_kernels.topk_details._topk_forward import _topk_forward
|
|
1060
|
-
from tritonparse.reproducer import utils as reproducer_utils
|
|
1061
|
-
|
|
1062
|
-
input_json_path = os.path.join(
|
|
1063
|
-
os.path.dirname(__file__),
|
|
1064
|
-
"example_output",
|
|
1065
|
-
"repro",
|
|
1066
|
-
"repro_context_20250816192455.json",
|
|
1067
|
-
)
|
|
1068
|
-
grid, args_dict = reproducer_utils.create_args_from_json(input_json_path)
|
|
1069
|
-
print("Generated kernel arguments dictionary:")
|
|
1070
|
-
for name, arg in args_dict.items():
|
|
1071
|
-
print(f" {name}: {arg}")
|
|
1072
|
-
print(f"Grid: {grid}")
|
|
1073
|
-
|
|
1074
|
-
# Set up test environment
|
|
1075
|
-
temp_dir = tempfile.mkdtemp()
|
|
1076
|
-
temp_dir_logs = os.path.join(temp_dir, "logs")
|
|
1077
|
-
temp_dir_parsed = os.path.join(temp_dir, "parsed_output")
|
|
1078
|
-
os.makedirs(temp_dir_logs, exist_ok=True)
|
|
1079
|
-
os.makedirs(temp_dir_parsed, exist_ok=True)
|
|
1080
|
-
print(f"Temporary directory: {temp_dir}")
|
|
1081
|
-
|
|
1082
|
-
# Initialize logging
|
|
1083
|
-
tritonparse.structured_logging.init(temp_dir_logs, enable_trace_launch=True)
|
|
1084
|
-
try:
|
|
1085
|
-
_topk_forward[tuple(grid)](
|
|
1086
|
-
args_dict["X"],
|
|
1087
|
-
args_dict["stride_xm"],
|
|
1088
|
-
args_dict["Yv"],
|
|
1089
|
-
args_dict["Yi"],
|
|
1090
|
-
args_dict["stride_ym"],
|
|
1091
|
-
args_dict["USE_PROVIDED_INDX"],
|
|
1092
|
-
args_dict["Bits"],
|
|
1093
|
-
args_dict["stride_rm"],
|
|
1094
|
-
args_dict["stride_rn"],
|
|
1095
|
-
args_dict["n_rows"],
|
|
1096
|
-
args_dict["n_expts_tot"],
|
|
1097
|
-
args_dict["S"],
|
|
1098
|
-
args_dict["BLOCK_S"],
|
|
1099
|
-
args_dict["s_blocks"],
|
|
1100
|
-
args_dict["APPLY_SOFTMAX"],
|
|
1101
|
-
args_dict["BLOCK_M"],
|
|
1102
|
-
args_dict["N_EXPTS_PAD"],
|
|
1103
|
-
args_dict["N_EXPTS_ACT"],
|
|
1104
|
-
args_dict["BLOCK_N"],
|
|
1105
|
-
)
|
|
1106
|
-
torch.cuda.synchronize()
|
|
1107
|
-
|
|
1108
|
-
# Read the only ndjson file under temp_dir_logs/logs/
|
|
1109
|
-
ndjson_files = [
|
|
1110
|
-
f for f in os.listdir(temp_dir_logs) if f.endswith(".ndjson")
|
|
1111
|
-
]
|
|
1112
|
-
assert (
|
|
1113
|
-
len(ndjson_files) == 1
|
|
1114
|
-
), f"Expected exactly one ndjson in {temp_dir_logs}, found {len(ndjson_files)}: {ndjson_files}"
|
|
1115
|
-
|
|
1116
|
-
ndjson_path = os.path.join(temp_dir_logs, ndjson_files[0])
|
|
1117
|
-
launches = []
|
|
1118
|
-
with open(ndjson_path, "r") as f:
|
|
1119
|
-
for line_num, line in enumerate(f, 1):
|
|
1120
|
-
try:
|
|
1121
|
-
event = json.loads(line.strip())
|
|
1122
|
-
if event.get("event_type") == "launch":
|
|
1123
|
-
launches.append(event)
|
|
1124
|
-
except json.JSONDecodeError as e:
|
|
1125
|
-
self.fail(f"JSON decode error at line {line_num}: {e}")
|
|
1126
|
-
|
|
1127
|
-
assert (
|
|
1128
|
-
len(launches) == 1
|
|
1129
|
-
), f"Expected exactly one launch event, found {len(launches)}"
|
|
1130
|
-
|
|
1131
|
-
launch = launches[0]
|
|
1132
|
-
extracted_args = launch.get("extracted_args", {})
|
|
1133
|
-
assert "X" in extracted_args, "Missing 'X' in extracted_args"
|
|
1134
|
-
assert (
|
|
1135
|
-
extracted_args["X"].get("type") == "triton_kernels.tensor.Tensor"
|
|
1136
|
-
), f"X.type is {extracted_args['X'].get('type')}"
|
|
1137
|
-
|
|
1138
|
-
finally:
|
|
1139
|
-
# Clean up
|
|
1140
|
-
if TEST_KEEP_OUTPUT:
|
|
1141
|
-
print(
|
|
1142
|
-
f"✓ Preserving temporary directory (TEST_KEEP_OUTPUT=1): {temp_dir}"
|
|
1143
|
-
)
|
|
1144
|
-
else:
|
|
1145
|
-
shutil.rmtree(temp_dir)
|
|
1146
|
-
print("✓ Cleaned up temporary directory")
|
|
1147
|
-
tritonparse.structured_logging.clear_logging_config()
|
|
1148
|
-
|
|
1149
1052
|
@unittest.skipIf(is_fbcode(), "Skip in internal FB environment")
|
|
1150
1053
|
@unittest.skipUnless(torch.cuda.is_available(), "CUDA not available")
|
|
1151
1054
|
def test_reproducer_end_to_end(self):
|
|
@@ -1264,6 +1167,281 @@ class TestTritonparseCUDA(unittest.TestCase):
|
|
|
1264
1167
|
shutil.rmtree(temp_dir)
|
|
1265
1168
|
print("✓ Cleaned up temporary directory")
|
|
1266
1169
|
|
|
1170
|
+
@unittest.skipUnless(torch.cuda.is_available(), "CUDA not available")
|
|
1171
|
+
def test_tensor_blob_manager(self):
|
|
1172
|
+
"""Test TensorBlobManager functionality with context manager"""
|
|
1173
|
+
|
|
1174
|
+
# Setup fresh cache for this test
|
|
1175
|
+
test_cache_dir, prev_cache_dir = self.setup_test_with_fresh_cache()
|
|
1176
|
+
|
|
1177
|
+
# Define a simple kernel that accepts tensor inputs
|
|
1178
|
+
@triton.jit
|
|
1179
|
+
def tensor_input_kernel(
|
|
1180
|
+
input_ptr,
|
|
1181
|
+
output_ptr,
|
|
1182
|
+
n_elements,
|
|
1183
|
+
BLOCK_SIZE: tl.constexpr,
|
|
1184
|
+
):
|
|
1185
|
+
pid = tl.program_id(axis=0)
|
|
1186
|
+
block_start = pid * BLOCK_SIZE
|
|
1187
|
+
offsets = block_start + tl.arange(0, BLOCK_SIZE)
|
|
1188
|
+
mask = offsets < n_elements
|
|
1189
|
+
|
|
1190
|
+
x = tl.load(input_ptr + offsets, mask=mask)
|
|
1191
|
+
y = x * 2.0
|
|
1192
|
+
tl.store(output_ptr + offsets, y, mask=mask)
|
|
1193
|
+
|
|
1194
|
+
def run_kernel(input_tensor):
|
|
1195
|
+
n_elements = input_tensor.numel()
|
|
1196
|
+
output = torch.empty_like(input_tensor)
|
|
1197
|
+
BLOCK_SIZE = 256
|
|
1198
|
+
grid = (triton.cdiv(n_elements, BLOCK_SIZE),)
|
|
1199
|
+
tensor_input_kernel[grid](input_tensor, output, n_elements, BLOCK_SIZE)
|
|
1200
|
+
return output
|
|
1201
|
+
|
|
1202
|
+
def collect_blob_files(manager_dir_path):
|
|
1203
|
+
"""Collect all .bin and .bin.gz files from saved_tensors directory."""
|
|
1204
|
+
saved_tensors_dir = os.path.join(manager_dir_path, "saved_tensors")
|
|
1205
|
+
bin_files = []
|
|
1206
|
+
gz_files = []
|
|
1207
|
+
|
|
1208
|
+
if not os.path.exists(saved_tensors_dir):
|
|
1209
|
+
return bin_files, gz_files
|
|
1210
|
+
|
|
1211
|
+
for subdir in os.listdir(saved_tensors_dir):
|
|
1212
|
+
subdir_path = os.path.join(saved_tensors_dir, subdir)
|
|
1213
|
+
if os.path.isdir(subdir_path):
|
|
1214
|
+
for filename in os.listdir(subdir_path):
|
|
1215
|
+
full_path = os.path.join(subdir_path, filename)
|
|
1216
|
+
if filename.endswith(".bin.gz"):
|
|
1217
|
+
gz_files.append(full_path)
|
|
1218
|
+
elif filename.endswith(".bin"):
|
|
1219
|
+
bin_files.append(full_path)
|
|
1220
|
+
|
|
1221
|
+
return bin_files, gz_files
|
|
1222
|
+
|
|
1223
|
+
def count_all_blobs(manager_dir_path):
|
|
1224
|
+
"""Count total number of blob files (.bin and .bin.gz)."""
|
|
1225
|
+
bin_files, gz_files = collect_blob_files(manager_dir_path)
|
|
1226
|
+
return len(bin_files) + len(gz_files)
|
|
1227
|
+
|
|
1228
|
+
# Prepare test data
|
|
1229
|
+
torch.manual_seed(0)
|
|
1230
|
+
|
|
1231
|
+
# === Test 1: Mixed tensor sizes with compression threshold ===
|
|
1232
|
+
print("\n=== Test 1: Mixed Tensor Sizes with Compression Threshold ===")
|
|
1233
|
+
temp_output_dir_1 = tempfile.mkdtemp()
|
|
1234
|
+
|
|
1235
|
+
with tritonparse.context_manager.TritonParseManager(
|
|
1236
|
+
enable_trace_launch=True,
|
|
1237
|
+
enable_tensor_blob_storage=True,
|
|
1238
|
+
out=temp_output_dir_1,
|
|
1239
|
+
) as manager:
|
|
1240
|
+
# Test different tensor sizes around the 1MB compression threshold
|
|
1241
|
+
test_cases = [
|
|
1242
|
+
((512,), "Tiny 2KB"), # 2KB < 1MB -> .bin
|
|
1243
|
+
((100 * 1024,), "Medium 400KB"), # 400KB < 1MB -> .bin
|
|
1244
|
+
((5 * 1024 * 1024,), "Large 20MB"), # 20MB > 1MB -> .bin.gz
|
|
1245
|
+
((100 * 1024 * 1024,), "Very large 400MB"), # 400MB > 1MB -> .bin.gz
|
|
1246
|
+
]
|
|
1247
|
+
|
|
1248
|
+
# Create tensors and run kernels
|
|
1249
|
+
for size, desc in test_cases:
|
|
1250
|
+
x = torch.randn(size, device=self.cuda_device, dtype=torch.float32)
|
|
1251
|
+
y = run_kernel(x)
|
|
1252
|
+
y.sum()
|
|
1253
|
+
torch.cuda.synchronize()
|
|
1254
|
+
|
|
1255
|
+
# Collect and verify blob files
|
|
1256
|
+
bin_files, gz_files = collect_blob_files(manager.dir_path)
|
|
1257
|
+
assert len(bin_files) + len(gz_files) > 0, "No blob files found"
|
|
1258
|
+
|
|
1259
|
+
print(f"Found {len(bin_files)} .bin files:")
|
|
1260
|
+
for f in bin_files:
|
|
1261
|
+
print(f" {f} ({os.path.getsize(f)} bytes)")
|
|
1262
|
+
print(f"Found {len(gz_files)} .bin.gz files:")
|
|
1263
|
+
for f in gz_files:
|
|
1264
|
+
print(f" {f} ({os.path.getsize(f)} bytes)")
|
|
1265
|
+
|
|
1266
|
+
# Verify correct number of files (2 small uncompressed, 2 large compressed)
|
|
1267
|
+
assert (
|
|
1268
|
+
len(bin_files) == 4
|
|
1269
|
+
), f"Expected 4 .bin files (2KB, 400KB), got {len(bin_files)}"
|
|
1270
|
+
assert (
|
|
1271
|
+
len(gz_files) == 4
|
|
1272
|
+
), f"Expected 4 .bin.gz files (20MB, 400MB), got {len(gz_files)}"
|
|
1273
|
+
|
|
1274
|
+
print(
|
|
1275
|
+
f"✓ Mixed sizes: {len(bin_files)} uncompressed (.bin), {len(gz_files)} compressed (.bin.gz)"
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1278
|
+
# Verify both formats can be loaded
|
|
1279
|
+
from tritonparse.tools.load_tensor import load_tensor
|
|
1280
|
+
|
|
1281
|
+
if bin_files:
|
|
1282
|
+
loaded = load_tensor(bin_files[0])
|
|
1283
|
+
assert loaded is not None, "Failed to load .bin file"
|
|
1284
|
+
print("✓ Successfully loaded .bin file")
|
|
1285
|
+
|
|
1286
|
+
if gz_files:
|
|
1287
|
+
loaded = load_tensor(gz_files[0])
|
|
1288
|
+
assert loaded is not None, "Failed to load .bin.gz file"
|
|
1289
|
+
print("✓ Successfully loaded .bin.gz file")
|
|
1290
|
+
|
|
1291
|
+
print("✓ Both formats (.bin and .bin.gz) verified")
|
|
1292
|
+
|
|
1293
|
+
# === Test 2: Deduplication ===
|
|
1294
|
+
print("\n=== Test 2: Deduplication ===")
|
|
1295
|
+
temp_output_dir_2 = tempfile.mkdtemp()
|
|
1296
|
+
|
|
1297
|
+
with tritonparse.context_manager.TritonParseManager(
|
|
1298
|
+
enable_trace_launch=True,
|
|
1299
|
+
enable_tensor_blob_storage=True,
|
|
1300
|
+
out=temp_output_dir_2,
|
|
1301
|
+
) as manager:
|
|
1302
|
+
# Use the same tensor multiple times
|
|
1303
|
+
x = torch.randn((512,), device=self.cuda_device, dtype=torch.float32)
|
|
1304
|
+
|
|
1305
|
+
# Run kernel 3 times with same input
|
|
1306
|
+
for i in range(3):
|
|
1307
|
+
y = run_kernel(x)
|
|
1308
|
+
y.sum()
|
|
1309
|
+
torch.cuda.synchronize()
|
|
1310
|
+
|
|
1311
|
+
# Count blob files
|
|
1312
|
+
# Note: The system may save both input and output tensors.
|
|
1313
|
+
# - Input tensor x: reused 3 times → should deduplicate to 1 blob
|
|
1314
|
+
# - Output tensors y: 3 separate allocations → may be 3 blobs (if different) or 1 blob (if identical)
|
|
1315
|
+
# Expected: fewer blobs than total tensor references due to deduplication
|
|
1316
|
+
blob_count = count_all_blobs(manager.dir_path)
|
|
1317
|
+
# With deduplication, we should have significantly fewer blobs than 6 (3 inputs + 3 outputs)
|
|
1318
|
+
assert (
|
|
1319
|
+
blob_count < 6
|
|
1320
|
+
), f"Deduplication should reduce blob count, got {blob_count} for 3 launches"
|
|
1321
|
+
# We expect at least 1 blob (the deduplicated input)
|
|
1322
|
+
assert blob_count >= 1, f"Should have at least 1 blob, got {blob_count}"
|
|
1323
|
+
print(
|
|
1324
|
+
f"✓ Deduplication working: {blob_count} unique blob(s) for 3 launches (< 6 without dedup)"
|
|
1325
|
+
)
|
|
1326
|
+
|
|
1327
|
+
# === Test 3: Quota limit ===
|
|
1328
|
+
print("\n=== Test 3: Quota Limit ===")
|
|
1329
|
+
temp_output_dir_3 = tempfile.mkdtemp()
|
|
1330
|
+
|
|
1331
|
+
# Calculate quota to allow exactly one tensor to be saved
|
|
1332
|
+
# A 10000 element float32 tensor = 10000 * 4 bytes = 40KB
|
|
1333
|
+
# After torch.save serialization, it will be larger (includes metadata)
|
|
1334
|
+
# Compressed size will be smaller for random data (but still substantial)
|
|
1335
|
+
# Set quota to ~60KB to allow first tensor but not second
|
|
1336
|
+
# Note: Random data doesn't compress as well as zeros
|
|
1337
|
+
quota_for_one_tensor = 60 * 1024 # 60KB should fit one serialized tensor
|
|
1338
|
+
|
|
1339
|
+
with tritonparse.context_manager.TritonParseManager(
|
|
1340
|
+
enable_trace_launch=True,
|
|
1341
|
+
enable_tensor_blob_storage=True,
|
|
1342
|
+
tensor_storage_quota=quota_for_one_tensor,
|
|
1343
|
+
out=temp_output_dir_3,
|
|
1344
|
+
) as manager:
|
|
1345
|
+
# Create first tensor - should be saved successfully
|
|
1346
|
+
large_x1 = torch.randn(
|
|
1347
|
+
(10000,), device=self.cuda_device, dtype=torch.float32
|
|
1348
|
+
)
|
|
1349
|
+
y1 = run_kernel(large_x1)
|
|
1350
|
+
y1.sum()
|
|
1351
|
+
torch.cuda.synchronize()
|
|
1352
|
+
|
|
1353
|
+
# Check that first tensor was saved
|
|
1354
|
+
blob_count_after_first = count_all_blobs(manager.dir_path)
|
|
1355
|
+
print(f" Blobs after first kernel launch: {blob_count_after_first}")
|
|
1356
|
+
|
|
1357
|
+
# Create second tensor - should exceed quota and trigger storage disable
|
|
1358
|
+
large_x2 = torch.randn(
|
|
1359
|
+
(10000,), device=self.cuda_device, dtype=torch.float32
|
|
1360
|
+
)
|
|
1361
|
+
y2 = run_kernel(large_x2)
|
|
1362
|
+
y2.sum()
|
|
1363
|
+
torch.cuda.synchronize()
|
|
1364
|
+
|
|
1365
|
+
# Verify quota enforcement
|
|
1366
|
+
blob_count_final = count_all_blobs(manager.dir_path)
|
|
1367
|
+
print(f" Blobs after second kernel launch: {blob_count_final}")
|
|
1368
|
+
|
|
1369
|
+
# We expect at least 1 blob was saved (from first launch)
|
|
1370
|
+
assert (
|
|
1371
|
+
blob_count_after_first >= 1
|
|
1372
|
+
), f"First tensor should be saved, got {blob_count_after_first} blobs"
|
|
1373
|
+
|
|
1374
|
+
# After quota exceeded, no more blobs should be added
|
|
1375
|
+
# (blob_count_final should equal blob_count_after_first or be slightly higher
|
|
1376
|
+
# if some outputs were saved before quota was hit)
|
|
1377
|
+
assert (
|
|
1378
|
+
blob_count_final <= blob_count_after_first + 1
|
|
1379
|
+
), f"Quota should prevent saving many more blobs: first={blob_count_after_first}, final={blob_count_final}"
|
|
1380
|
+
|
|
1381
|
+
print(
|
|
1382
|
+
f"✓ Quota enforced: {blob_count_after_first} blob(s) saved before quota limit"
|
|
1383
|
+
)
|
|
1384
|
+
|
|
1385
|
+
# The test passes if it doesn't crash - storage should be disabled after quota exceeded
|
|
1386
|
+
print("✓ Quota limit test passed (storage disabled when quota exceeded)")
|
|
1387
|
+
|
|
1388
|
+
# Reset global variables to default after Test 3 to avoid polluting Test 4
|
|
1389
|
+
tritonparse.structured_logging.TRITONPARSE_TENSOR_STORAGE_QUOTA = (
|
|
1390
|
+
100 * 1024 * 1024 * 1024
|
|
1391
|
+
) # 100GB default
|
|
1392
|
+
tritonparse.structured_logging.TRITONPARSE_SAVE_TENSOR_BLOBS = (
|
|
1393
|
+
False # Reset to default (disabled)
|
|
1394
|
+
)
|
|
1395
|
+
|
|
1396
|
+
# === Test 4: Disabled storage ===
|
|
1397
|
+
print("\n=== Test 4: Disabled Storage ===")
|
|
1398
|
+
temp_output_dir_4 = tempfile.mkdtemp()
|
|
1399
|
+
|
|
1400
|
+
# When storage is explicitly disabled, don't set quota to avoid confusion
|
|
1401
|
+
with tritonparse.context_manager.TritonParseManager(
|
|
1402
|
+
enable_trace_launch=True,
|
|
1403
|
+
enable_tensor_blob_storage=False, # Explicitly disabled
|
|
1404
|
+
out=temp_output_dir_4,
|
|
1405
|
+
) as manager:
|
|
1406
|
+
x = torch.randn((512,), device=self.cuda_device, dtype=torch.float32)
|
|
1407
|
+
y = run_kernel(x)
|
|
1408
|
+
y.sum()
|
|
1409
|
+
torch.cuda.synchronize()
|
|
1410
|
+
|
|
1411
|
+
# Verify no saved_tensors directory or it's empty
|
|
1412
|
+
total_blobs = count_all_blobs(manager.dir_path)
|
|
1413
|
+
assert (
|
|
1414
|
+
total_blobs == 0
|
|
1415
|
+
), f"Expected no blobs when storage disabled, found {total_blobs}"
|
|
1416
|
+
print("✓ Storage correctly disabled when enable_tensor_blob_storage=False")
|
|
1417
|
+
|
|
1418
|
+
# Clean up all test outputs
|
|
1419
|
+
try:
|
|
1420
|
+
if TEST_KEEP_OUTPUT:
|
|
1421
|
+
print(
|
|
1422
|
+
f"\n✓ Preserving output directories (TEST_KEEP_OUTPUT=1):\n"
|
|
1423
|
+
f" Test 1: {temp_output_dir_1}\n"
|
|
1424
|
+
f" Test 2: {temp_output_dir_2}\n"
|
|
1425
|
+
f" Test 3: {temp_output_dir_3}\n"
|
|
1426
|
+
f" Test 4: {temp_output_dir_4}"
|
|
1427
|
+
)
|
|
1428
|
+
else:
|
|
1429
|
+
for temp_dir in [
|
|
1430
|
+
temp_output_dir_1,
|
|
1431
|
+
temp_output_dir_2,
|
|
1432
|
+
temp_output_dir_3,
|
|
1433
|
+
temp_output_dir_4,
|
|
1434
|
+
]:
|
|
1435
|
+
if os.path.exists(temp_dir):
|
|
1436
|
+
shutil.rmtree(temp_dir)
|
|
1437
|
+
print("✓ Cleaned up all test output directories")
|
|
1438
|
+
except Exception as e:
|
|
1439
|
+
print(f"Warning: Failed to clean up output directories: {e}")
|
|
1440
|
+
|
|
1441
|
+
finally:
|
|
1442
|
+
# Cleanup test-specific cache
|
|
1443
|
+
self.cleanup_test_cache(test_cache_dir, prev_cache_dir)
|
|
1444
|
+
|
|
1267
1445
|
|
|
1268
1446
|
if __name__ == "__main__":
|
|
1269
1447
|
unittest.main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tritonparse
|
|
3
|
-
Version: 0.2.4.
|
|
3
|
+
Version: 0.2.4.dev20251009071511
|
|
4
4
|
Summary: TritonParse: A Compiler Tracer, Visualizer, and mini-Reproducer Generator for Triton Kernels
|
|
5
5
|
Author-email: Yueming Hao <yhao@meta.com>
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -34,13 +34,18 @@ Dynamic: license-file
|
|
|
34
34
|
- **📝 Multi-format IR Support** - View TTGIR, TTIR, LLIR, PTX, and AMDGCN
|
|
35
35
|
- **🎯 Interactive Code Views** - Click-to-highlight corresponding lines across IR stages
|
|
36
36
|
|
|
37
|
+
### 🔧 Reproducer & Debugging Tools
|
|
38
|
+
- **🔄 Standalone Script Generation** - Extract any kernel into a self-contained Python script
|
|
39
|
+
- **💾 Tensor Data Reconstruction** - Preserve actual tensor data or use statistical approximation
|
|
40
|
+
- **🎯 Custom Templates** - Flexible reproducer templates for different workflows
|
|
41
|
+
- **🐛 Bug Isolation** - Share reproducible test cases for debugging and collaboration
|
|
42
|
+
|
|
37
43
|
### 📊 Structured Logging & Analysis
|
|
38
44
|
- **📝 Compilation & Launch Tracing** - Capture detailed events with source mapping
|
|
39
45
|
- **🔍 Stack Trace Integration** - Full Python stack traces for debugging
|
|
40
46
|
- **📈 Metadata Extraction** - Comprehensive kernel statistics
|
|
41
47
|
|
|
42
48
|
### 🛠️ Developer Tools
|
|
43
|
-
- **🔧 Reproducer Generation** - Generate standalone Python scripts to reproduce kernels
|
|
44
49
|
- **🌐 Browser-based Interface** - No installation required, works in your browser
|
|
45
50
|
- **🔒 Privacy-first** - All processing happens locally, no data uploaded
|
|
46
51
|
|
|
@@ -87,6 +92,41 @@ tritonparse.utils.unified_parse("./logs/", out="./parsed_output")
|
|
|
87
92
|
|
|
88
93
|
> **🔒 Privacy Note**: Your trace files are processed entirely in your browser - nothing is uploaded to any server!
|
|
89
94
|
|
|
95
|
+
### 3. Generate Reproducers (Optional)
|
|
96
|
+
|
|
97
|
+
Extract any kernel into a standalone, executable Python script for debugging or testing:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# Generate reproducer from first launch event
|
|
101
|
+
tritonparse reproduce ./parsed_output/trace.ndjson.gz --line 2 --out-dir repro_output
|
|
102
|
+
|
|
103
|
+
# Run the generated reproducer
|
|
104
|
+
cd repro_output/<kernel_name>/
|
|
105
|
+
python repro_*.py
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Python API:**
|
|
109
|
+
```python
|
|
110
|
+
from tritonparse.reproducer.orchestrator import reproduce
|
|
111
|
+
|
|
112
|
+
result = reproduce(
|
|
113
|
+
input_path="./parsed_output/trace.ndjson.gz",
|
|
114
|
+
line_index=1, # Which launch event (1-based)
|
|
115
|
+
out_dir="repro_output"
|
|
116
|
+
)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
<details>
|
|
120
|
+
<summary>🎯 Common Reproducer Use Cases (click to expand)</summary>
|
|
121
|
+
|
|
122
|
+
- **🐛 Bug Isolation**: Extract a failing kernel into a minimal standalone script
|
|
123
|
+
- **⚡ Performance Testing**: Benchmark specific kernels without running the full application
|
|
124
|
+
- **🤝 Team Collaboration**: Share reproducible test cases with colleagues or in bug reports
|
|
125
|
+
- **📊 Regression Testing**: Compare kernel behavior and performance across different versions
|
|
126
|
+
- **🔍 Deep Debugging**: Modify and experiment with kernel parameters in isolation
|
|
127
|
+
|
|
128
|
+
</details>
|
|
129
|
+
|
|
90
130
|
## 🛠️ Installation
|
|
91
131
|
|
|
92
132
|
**For basic usage (trace generation):**
|
|
@@ -117,7 +157,7 @@ pip install triton
|
|
|
117
157
|
|----------|-------------|
|
|
118
158
|
| **[🏠 Wiki Home](https://github.com/meta-pytorch/tritonparse/wiki)** | Complete documentation and quick navigation |
|
|
119
159
|
| **[📦 Installation](https://github.com/meta-pytorch/tritonparse/wiki/01.-Installation)** | Setup guide for all scenarios |
|
|
120
|
-
| **[📋 Usage Guide](https://github.com/meta-pytorch/tritonparse/wiki/02.-Usage-Guide)** | Complete workflow,
|
|
160
|
+
| **[📋 Usage Guide](https://github.com/meta-pytorch/tritonparse/wiki/02.-Usage-Guide)** | Complete workflow, reproducer generation, and examples |
|
|
121
161
|
| **[🌐 Web Interface](https://github.com/meta-pytorch/tritonparse/wiki/03.-Web-Interface-Guide)** | Master the visualization interface |
|
|
122
162
|
| **[🔧 Developer Guide](https://github.com/meta-pytorch/tritonparse/wiki/04.-Developer-Guide)** | Contributing and architecture overview |
|
|
123
163
|
| **[📝 Code Formatting](https://github.com/meta-pytorch/tritonparse/wiki/05.-Code-Formatting)** | Formatting standards and tools |
|
|
@@ -10,7 +10,6 @@ pyproject.toml
|
|
|
10
10
|
run.py
|
|
11
11
|
.ci/README.md
|
|
12
12
|
.ci/install-project.sh
|
|
13
|
-
.ci/install-triton-kernels.sh
|
|
14
13
|
.ci/install-triton.sh
|
|
15
14
|
.ci/run-tests.sh
|
|
16
15
|
.ci/setup.sh
|
|
@@ -32,7 +31,6 @@ tests/example_output/parsed_output/f0_fc0_a0_cai-.ndjson.gz
|
|
|
32
31
|
tests/example_output/parsed_output/log_file_list.json
|
|
33
32
|
tests/example_output/parsed_output_complex/dedicated_log_triton_trace_findhao__mapped.ndjson.gz
|
|
34
33
|
tests/example_output/parsed_output_complex/log_file_list.json
|
|
35
|
-
tests/example_output/repro/repro_context_20250816192455.json
|
|
36
34
|
tritonparse/__init__.py
|
|
37
35
|
tritonparse/__main__.py
|
|
38
36
|
tritonparse/cli.py
|