wafer-core 0.1.44__py3-none-any.whl → 0.1.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer_core/lib/trace_compare/__init__.py +12 -0
- wafer_core/lib/trace_compare/graph_formatter.py +263 -0
- wafer_core/lib/trace_compare/graph_formatter_detailed.py +225 -0
- wafer_core/lib/trace_compare/graph_matcher.py +315 -0
- wafer_core/lib/trace_compare/graph_matcher_v2.py +332 -0
- {wafer_core-0.1.44.dist-info → wafer_core-0.1.46.dist-info}/METADATA +1 -1
- {wafer_core-0.1.44.dist-info → wafer_core-0.1.46.dist-info}/RECORD +8 -4
- {wafer_core-0.1.44.dist-info → wafer_core-0.1.46.dist-info}/WHEEL +0 -0
|
@@ -5,6 +5,8 @@ identifying kernel-level performance differences and fusion opportunities.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from .analyzer import analyze_traces
|
|
8
|
+
# from .api import analyze_trace_pair # TODO: api.py has unimplemented dependencies
|
|
9
|
+
from .architecture import ArchitectureType, detect_architecture
|
|
8
10
|
from .classifier import Op, classify
|
|
9
11
|
from .formatter import (
|
|
10
12
|
format_csv,
|
|
@@ -15,6 +17,9 @@ from .formatter import (
|
|
|
15
17
|
format_text,
|
|
16
18
|
)
|
|
17
19
|
from .fusion_analyzer import analyze_fusion_differences
|
|
20
|
+
from .graph_formatter import format_graph_comparison_json, format_graph_comparison_text
|
|
21
|
+
from .graph_formatter_detailed import format_graph_comparison_detailed
|
|
22
|
+
from .graph_matcher import match_traces
|
|
18
23
|
from .loader import load_trace
|
|
19
24
|
|
|
20
25
|
__all__ = [
|
|
@@ -22,6 +27,9 @@ __all__ = [
|
|
|
22
27
|
"classify",
|
|
23
28
|
"load_trace",
|
|
24
29
|
"analyze_traces",
|
|
30
|
+
# "analyze_trace_pair", # TODO: not yet implemented
|
|
31
|
+
"detect_architecture",
|
|
32
|
+
"ArchitectureType",
|
|
25
33
|
"analyze_fusion_differences",
|
|
26
34
|
"format_text",
|
|
27
35
|
"format_csv",
|
|
@@ -29,4 +37,8 @@ __all__ = [
|
|
|
29
37
|
"format_fusion_text",
|
|
30
38
|
"format_fusion_csv",
|
|
31
39
|
"format_fusion_json",
|
|
40
|
+
"match_traces",
|
|
41
|
+
"format_graph_comparison_text",
|
|
42
|
+
"format_graph_comparison_json",
|
|
43
|
+
"format_graph_comparison_detailed",
|
|
32
44
|
]
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"""Graph-based pattern-focused formatter for trace comparison.
|
|
2
|
+
|
|
3
|
+
Presents results grouped by CUDA graph execution patterns, reducing cognitive load
|
|
4
|
+
by showing 1-7 unique patterns instead of thousands of individual kernel executions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from collections import Counter, defaultdict
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def format_graph_comparison_text(result: dict[str, Any], show_all: bool = False) -> str:
|
|
12
|
+
"""Format graph matching results as pattern-focused text report.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
result: Results from graph_matcher.match_traces()
|
|
16
|
+
show_all: Show all patterns without truncation
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Formatted text report with pattern-focused UX
|
|
20
|
+
"""
|
|
21
|
+
lines = []
|
|
22
|
+
summary = result['summary']
|
|
23
|
+
graph_pairs = result['graph_pairs']
|
|
24
|
+
|
|
25
|
+
# Header
|
|
26
|
+
lines.append("=" * 80)
|
|
27
|
+
lines.append("TRACE COMPARISON - GRAPH-BASED ANALYSIS")
|
|
28
|
+
lines.append("=" * 80)
|
|
29
|
+
lines.append("")
|
|
30
|
+
|
|
31
|
+
# Overview section
|
|
32
|
+
lines.append("┌" + "─" * 78 + "┐")
|
|
33
|
+
lines.append("│ OVERVIEW" + " " * 69 + "│")
|
|
34
|
+
lines.append("├" + "─" * 78 + "┤")
|
|
35
|
+
|
|
36
|
+
# Calculate total time from graph pairs
|
|
37
|
+
amd_total_ms = sum(
|
|
38
|
+
sum(k.get('dur', 0) for k in pair['amd_kernels'])
|
|
39
|
+
for pair in graph_pairs
|
|
40
|
+
) / 1000
|
|
41
|
+
nv_total_ms = sum(
|
|
42
|
+
sum(k.get('dur', 0) for k in pair['nv_kernels'])
|
|
43
|
+
for pair in graph_pairs
|
|
44
|
+
) / 1000
|
|
45
|
+
|
|
46
|
+
amd_kernels = summary['total_kernel_pairs'] # Actually shows unique positions
|
|
47
|
+
nv_kernels = summary['total_kernel_pairs']
|
|
48
|
+
|
|
49
|
+
lines.append(f"│ AMD: {amd_kernels:>6,} kernel positions {amd_total_ms:>8.1f}ms" + " " * 25 + "│")
|
|
50
|
+
lines.append(f"│ NVIDIA: {nv_kernels:>6,} kernel positions {nv_total_ms:>8.1f}ms" + " " * 25 + "│")
|
|
51
|
+
lines.append(f"│" + " " * 78 + "│")
|
|
52
|
+
lines.append(f"│ Match rate: {summary['match_rate']:.1f}% "
|
|
53
|
+
f"({summary['matched']:,} matched, "
|
|
54
|
+
f"{summary['amd_only']:,} AMD-only, "
|
|
55
|
+
f"{summary['nv_only']:,} NV-only)" + " " * 5 + "│")
|
|
56
|
+
lines.append("└" + "─" * 78 + "┘")
|
|
57
|
+
lines.append("")
|
|
58
|
+
|
|
59
|
+
# CUDA Graph Patterns section
|
|
60
|
+
lines.append("┌" + "─" * 78 + "┐")
|
|
61
|
+
lines.append("│ CUDA GRAPH PATTERNS (Transformer Layers)" + " " * 36 + "│")
|
|
62
|
+
lines.append("├" + "─" * 78 + "┤")
|
|
63
|
+
lines.append(f"│ Total graph executions: {summary['num_graph_pairs']:,}" + " " * 50 + "│")
|
|
64
|
+
|
|
65
|
+
# Group patterns by kernel sequence
|
|
66
|
+
amd_patterns = _group_by_pattern(graph_pairs, 'amd')
|
|
67
|
+
nv_patterns = _group_by_pattern(graph_pairs, 'nv')
|
|
68
|
+
|
|
69
|
+
lines.append(f"│ Unique AMD patterns: {len(amd_patterns)}" + " " * 50 + "│")
|
|
70
|
+
lines.append(f"│ Unique NVIDIA patterns: {len(nv_patterns)}" + " " * 50 + "│")
|
|
71
|
+
lines.append("└" + "─" * 78 + "┘")
|
|
72
|
+
lines.append("")
|
|
73
|
+
|
|
74
|
+
# Pattern Details
|
|
75
|
+
lines.append("=" * 80)
|
|
76
|
+
lines.append("PATTERN DETAILS")
|
|
77
|
+
lines.append("=" * 80)
|
|
78
|
+
lines.append("")
|
|
79
|
+
|
|
80
|
+
# Show AMD patterns
|
|
81
|
+
lines.append("AMD Patterns:")
|
|
82
|
+
max_patterns = len(amd_patterns) if show_all else min(5, len(amd_patterns))
|
|
83
|
+
for i, (pattern, executions) in enumerate(list(amd_patterns.items())[:max_patterns], 1):
|
|
84
|
+
kernel_count = len(executions[0]['amd_kernels'])
|
|
85
|
+
lines.append(f" Pattern {i}: {len(executions):>4} executions ({kernel_count} kernels each)")
|
|
86
|
+
|
|
87
|
+
if i == 1: # Show detail for main pattern
|
|
88
|
+
lines.append(f" First few kernels:")
|
|
89
|
+
first_kernels = sorted(executions[0]['amd_kernels'], key=lambda x: x.get('ts', 0))[:5]
|
|
90
|
+
for k in first_kernels:
|
|
91
|
+
name = k.get('name', '')[:60]
|
|
92
|
+
lines.append(f" - {name}")
|
|
93
|
+
|
|
94
|
+
if not show_all and len(amd_patterns) > 5:
|
|
95
|
+
lines.append(f" ... ({len(amd_patterns) - 5} more patterns)")
|
|
96
|
+
lines.append("")
|
|
97
|
+
|
|
98
|
+
# Show NVIDIA patterns
|
|
99
|
+
lines.append("NVIDIA Patterns:")
|
|
100
|
+
max_patterns = len(nv_patterns) if show_all else min(5, len(nv_patterns))
|
|
101
|
+
for i, (pattern, executions) in enumerate(list(nv_patterns.items())[:max_patterns], 1):
|
|
102
|
+
kernel_count = len(executions[0]['nv_kernels'])
|
|
103
|
+
lines.append(f" Pattern {i}: {len(executions):>4} executions ({kernel_count} kernels each)")
|
|
104
|
+
|
|
105
|
+
if i == 1: # Show detail for main pattern
|
|
106
|
+
lines.append(f" First few kernels:")
|
|
107
|
+
first_kernels = sorted(executions[0]['nv_kernels'], key=lambda x: x.get('ts', 0))[:5]
|
|
108
|
+
for k in first_kernels:
|
|
109
|
+
name = k.get('name', '')[:60]
|
|
110
|
+
lines.append(f" - {name}")
|
|
111
|
+
|
|
112
|
+
if not show_all and len(nv_patterns) > 5:
|
|
113
|
+
lines.append(f" ... ({len(nv_patterns) - 5} more patterns)")
|
|
114
|
+
lines.append("")
|
|
115
|
+
|
|
116
|
+
# Drilling down into main pattern
|
|
117
|
+
lines.append("=" * 80)
|
|
118
|
+
lines.append("MAIN PATTERN COMPARISON (Pattern 1)")
|
|
119
|
+
lines.append("=" * 80)
|
|
120
|
+
lines.append("")
|
|
121
|
+
|
|
122
|
+
if amd_patterns and nv_patterns:
|
|
123
|
+
# Get first execution of main patterns
|
|
124
|
+
amd_main_executions = list(amd_patterns.values())[0]
|
|
125
|
+
nv_main_executions = list(nv_patterns.values())[0]
|
|
126
|
+
|
|
127
|
+
amd_main = amd_main_executions[0]
|
|
128
|
+
nv_main = nv_main_executions[0]
|
|
129
|
+
|
|
130
|
+
# Get kernel type distribution from matches
|
|
131
|
+
amd_types = Counter()
|
|
132
|
+
nv_types = Counter()
|
|
133
|
+
|
|
134
|
+
for match in amd_main['matches']:
|
|
135
|
+
if match['status'] in ['MATCH', 'AMD_ONLY']:
|
|
136
|
+
amd_types[match['amd_type']] += 1
|
|
137
|
+
if match['status'] in ['MATCH', 'NV_ONLY']:
|
|
138
|
+
nv_types[match['nv_type']] += 1
|
|
139
|
+
|
|
140
|
+
lines.append("Kernel Type Distribution (per execution):")
|
|
141
|
+
lines.append(f"{'Type':<20} {'AMD':>8} {'NVIDIA':>8} {'Diff':>8}")
|
|
142
|
+
lines.append("-" * 50)
|
|
143
|
+
|
|
144
|
+
all_types = sorted(set(amd_types.keys()) | set(nv_types.keys()))
|
|
145
|
+
differences = []
|
|
146
|
+
|
|
147
|
+
for ktype in all_types:
|
|
148
|
+
amd_count = amd_types.get(ktype, 0)
|
|
149
|
+
nv_count = nv_types.get(ktype, 0)
|
|
150
|
+
diff = amd_count - nv_count
|
|
151
|
+
diff_str = f"+{diff}" if diff > 0 else str(diff) if diff < 0 else "="
|
|
152
|
+
lines.append(f"{ktype:<20} {amd_count:>8} {nv_count:>8} {diff_str:>8}")
|
|
153
|
+
|
|
154
|
+
if diff != 0:
|
|
155
|
+
differences.append((ktype, diff))
|
|
156
|
+
|
|
157
|
+
lines.append("")
|
|
158
|
+
lines.append("-" * 80)
|
|
159
|
+
lines.append("Key Findings:")
|
|
160
|
+
|
|
161
|
+
if differences:
|
|
162
|
+
# Sort by absolute difference
|
|
163
|
+
differences.sort(key=lambda x: abs(x[1]), reverse=True)
|
|
164
|
+
|
|
165
|
+
for ktype, diff in differences[:3]:
|
|
166
|
+
total_extra = abs(diff) * len(amd_main_executions)
|
|
167
|
+
if diff > 0:
|
|
168
|
+
lines.append(f" • AMD runs {diff:+d} extra {ktype} per execution")
|
|
169
|
+
lines.append(f" → {total_extra:,} extra operations across all executions")
|
|
170
|
+
else:
|
|
171
|
+
lines.append(f" • NVIDIA runs {abs(diff)} extra {ktype} per execution")
|
|
172
|
+
lines.append(f" → {total_extra:,} extra operations across all executions")
|
|
173
|
+
else:
|
|
174
|
+
lines.append(" • Perfect match - kernel types align exactly!")
|
|
175
|
+
|
|
176
|
+
lines.append("")
|
|
177
|
+
|
|
178
|
+
# Aggregate Statistics
|
|
179
|
+
lines.append("=" * 80)
|
|
180
|
+
lines.append("AGGREGATE STATISTICS")
|
|
181
|
+
lines.append("=" * 80)
|
|
182
|
+
lines.append("")
|
|
183
|
+
|
|
184
|
+
if amd_patterns and nv_patterns:
|
|
185
|
+
amd_main_executions = list(amd_patterns.values())[0]
|
|
186
|
+
nv_main_executions = list(nv_patterns.values())[0]
|
|
187
|
+
|
|
188
|
+
amd_main_count = len(amd_main_executions)
|
|
189
|
+
nv_main_count = len(nv_main_executions)
|
|
190
|
+
|
|
191
|
+
lines.append(f"Main Pattern (appears {min(amd_main_count, nv_main_count)}x on both platforms):")
|
|
192
|
+
|
|
193
|
+
amd_kernels_per = len(amd_main_executions[0]['amd_kernels'])
|
|
194
|
+
nv_kernels_per = len(nv_main_executions[0]['nv_kernels'])
|
|
195
|
+
|
|
196
|
+
lines.append(f" AMD: {amd_kernels_per} kernels × {amd_main_count} executions = {amd_kernels_per * amd_main_count:,} total kernels")
|
|
197
|
+
lines.append(f" NVIDIA: {nv_kernels_per} kernels × {nv_main_count} executions = {nv_kernels_per * nv_main_count:,} total kernels")
|
|
198
|
+
|
|
199
|
+
# Calculate time for main pattern
|
|
200
|
+
amd_time = sum(
|
|
201
|
+
sum(k.get('dur', 0) for k in exec['amd_kernels'])
|
|
202
|
+
for exec in amd_main_executions
|
|
203
|
+
)
|
|
204
|
+
nv_time = sum(
|
|
205
|
+
sum(k.get('dur', 0) for k in exec['nv_kernels'])
|
|
206
|
+
for exec in nv_main_executions
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
lines.append(f"")
|
|
210
|
+
lines.append(f" Total time in main pattern:")
|
|
211
|
+
lines.append(f" AMD: {amd_time/1000:.1f}ms ({amd_time/amd_total_ms/10:.1f}% of total)")
|
|
212
|
+
lines.append(f" NVIDIA: {nv_time/1000:.1f}ms ({nv_time/nv_total_ms/10:.1f}% of total)")
|
|
213
|
+
|
|
214
|
+
lines.append("")
|
|
215
|
+
return "\n".join(lines)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _group_by_pattern(
|
|
219
|
+
graph_pairs: list[dict[str, Any]],
|
|
220
|
+
platform: str
|
|
221
|
+
) -> dict[tuple, list[dict[str, Any]]]:
|
|
222
|
+
"""Group graph executions by their kernel sequence pattern.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
graph_pairs: List of graph pair dictionaries
|
|
226
|
+
platform: 'amd' or 'nv'
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Dictionary mapping pattern signatures to list of executions
|
|
230
|
+
"""
|
|
231
|
+
patterns: dict[tuple, list[dict[str, Any]]] = defaultdict(list)
|
|
232
|
+
|
|
233
|
+
kernels_key = f'{platform}_kernels'
|
|
234
|
+
|
|
235
|
+
for pair in graph_pairs:
|
|
236
|
+
kernels = pair[kernels_key]
|
|
237
|
+
sorted_kernels = sorted(kernels, key=lambda x: x.get('ts', 0))
|
|
238
|
+
|
|
239
|
+
# Pattern signature: tuple of kernel names in order
|
|
240
|
+
signature = tuple(k.get('name', '') for k in sorted_kernels)
|
|
241
|
+
patterns[signature].append(pair)
|
|
242
|
+
|
|
243
|
+
# Sort by frequency (most common first)
|
|
244
|
+
sorted_patterns = dict(sorted(
|
|
245
|
+
patterns.items(),
|
|
246
|
+
key=lambda x: len(x[1]),
|
|
247
|
+
reverse=True
|
|
248
|
+
))
|
|
249
|
+
|
|
250
|
+
return sorted_patterns
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def format_graph_comparison_json(result: dict[str, Any]) -> str:
|
|
254
|
+
"""Format graph matching results as JSON.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
result: Results from graph_matcher.match_traces()
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
JSON string
|
|
261
|
+
"""
|
|
262
|
+
import json
|
|
263
|
+
return json.dumps(result, indent=2)
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Detailed kernel-to-kernel formatter for graph matching results.
|
|
2
|
+
|
|
3
|
+
Shows individual kernel pairs in position order (default) or grouped by operation type.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from collections import Counter, defaultdict
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def format_graph_comparison_detailed(
|
|
11
|
+
result: dict[str, Any],
|
|
12
|
+
show_all: bool = False,
|
|
13
|
+
group_by_op: bool = False,
|
|
14
|
+
max_graphs: int = 3,
|
|
15
|
+
) -> str:
|
|
16
|
+
"""Format graph matching results with kernel-to-kernel details.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
result: Results from graph_matcher.match_traces()
|
|
20
|
+
show_all: Show all kernel pairs without truncation
|
|
21
|
+
group_by_op: Group kernels by operation type instead of position order
|
|
22
|
+
max_graphs: Maximum number of graph pairs to show in detail (default: 3)
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Formatted text report with kernel-to-kernel matching
|
|
26
|
+
"""
|
|
27
|
+
lines = []
|
|
28
|
+
summary = result['summary']
|
|
29
|
+
graph_pairs = result['graph_pairs']
|
|
30
|
+
|
|
31
|
+
# Header
|
|
32
|
+
lines.append("=" * 80)
|
|
33
|
+
lines.append("TRACE COMPARISON - AMD vs NVIDIA")
|
|
34
|
+
lines.append("=" * 80)
|
|
35
|
+
lines.append("")
|
|
36
|
+
|
|
37
|
+
# Section 1: Overview
|
|
38
|
+
lines.append("┏" + "━" * 78 + "┓")
|
|
39
|
+
lines.append("┃ SECTION 1: OVERVIEW" + " " * 58 + "┃")
|
|
40
|
+
lines.append("┣" + "━" * 78 + "┫")
|
|
41
|
+
|
|
42
|
+
total_graphs = summary['num_graph_pairs']
|
|
43
|
+
amd_total_kernels = sum(len(pair['amd_kernels']) for pair in graph_pairs)
|
|
44
|
+
nv_total_kernels = sum(len(pair['nv_kernels']) for pair in graph_pairs)
|
|
45
|
+
|
|
46
|
+
lines.append(f"┃ Transformer layer graphs: AMD: {total_graphs} NVIDIA: {total_graphs}" + " " * 27 + "┃")
|
|
47
|
+
lines.append(f"┃ Graph pairs to compare: {total_graphs}" + " " * 50 + "┃")
|
|
48
|
+
lines.append(f"┃ Total kernels in graphs: AMD: {amd_total_kernels} NVIDIA: {nv_total_kernels}" + " " * 23 + "┃")
|
|
49
|
+
lines.append("┗" + "━" * 78 + "┛")
|
|
50
|
+
lines.append("")
|
|
51
|
+
|
|
52
|
+
# Section 2: Non-graph kernels (placeholder - always 0 for transformer layers)
|
|
53
|
+
lines.append("┏" + "━" * 78 + "┓")
|
|
54
|
+
lines.append("┃ SECTION 2: NON-GRAPH KERNELS" + " " * 48 + "┃")
|
|
55
|
+
lines.append("┣" + "━" * 78 + "┫")
|
|
56
|
+
lines.append("┃ ✓ All kernels are in CUDA graphs" + " " * 44 + "┃")
|
|
57
|
+
lines.append("┗" + "━" * 78 + "┛")
|
|
58
|
+
lines.append("")
|
|
59
|
+
|
|
60
|
+
# Section 3: Unique patterns count
|
|
61
|
+
amd_patterns = _group_by_pattern(graph_pairs, 'amd')
|
|
62
|
+
nv_patterns = _group_by_pattern(graph_pairs, 'nv')
|
|
63
|
+
|
|
64
|
+
lines.append("┏" + "━" * 78 + "┓")
|
|
65
|
+
lines.append("┃ SECTION 3: CUDA GRAPH PATTERNS (Transformer Layers)" + " " * 25 + "┃")
|
|
66
|
+
lines.append("┣" + "━" * 78 + "┫")
|
|
67
|
+
lines.append(f"┃ Unique patterns: AMD: {len(amd_patterns)}, NVIDIA: {len(nv_patterns)}" + " " * 42 + "┃")
|
|
68
|
+
lines.append(f"┃ Total executions: AMD: {total_graphs}, NVIDIA: {total_graphs}" + " " * 32 + "┃")
|
|
69
|
+
lines.append("┗" + "━" * 78 + "┛")
|
|
70
|
+
lines.append("")
|
|
71
|
+
|
|
72
|
+
# Show representative patterns
|
|
73
|
+
lines.append("=" * 80)
|
|
74
|
+
lines.append(f"UNIQUE GRAPH PATTERNS (showing up to {max_graphs})")
|
|
75
|
+
lines.append("=" * 80)
|
|
76
|
+
lines.append("")
|
|
77
|
+
|
|
78
|
+
num_patterns_to_show = min(max_graphs, len(amd_patterns)) if not show_all else len(amd_patterns)
|
|
79
|
+
|
|
80
|
+
for pattern_idx, (amd_pattern, amd_executions) in enumerate(list(amd_patterns.items())[:num_patterns_to_show], 1):
|
|
81
|
+
lines.append(f"┌─ Pattern {pattern_idx} " + "─" * (73 - len(f"Pattern {pattern_idx}")) + "┐")
|
|
82
|
+
|
|
83
|
+
# Find corresponding NVIDIA pattern
|
|
84
|
+
nv_pattern_idx = min(pattern_idx - 1, len(nv_patterns) - 1)
|
|
85
|
+
nv_executions = list(nv_patterns.values())[nv_pattern_idx]
|
|
86
|
+
|
|
87
|
+
amd_kernels_per = len(amd_executions[0]['amd_kernels'])
|
|
88
|
+
nv_kernels_per = len(nv_executions[0]['nv_kernels'])
|
|
89
|
+
|
|
90
|
+
lines.append(f"│ AMD: {len(amd_executions)} executions × {amd_kernels_per} kernels each" + " " * 35 + "│")
|
|
91
|
+
lines.append(f"│ NVIDIA: {len(nv_executions)} executions × {nv_kernels_per} kernels each" + " " * 35 + "│")
|
|
92
|
+
lines.append("└" + "─" * 78 + "┘")
|
|
93
|
+
lines.append("")
|
|
94
|
+
|
|
95
|
+
# Show match quality for this pattern
|
|
96
|
+
matches = amd_executions[0]['matches']
|
|
97
|
+
matched = sum(1 for m in matches if m['status'] == 'MATCH')
|
|
98
|
+
amd_only = sum(1 for m in matches if m['status'] == 'AMD_ONLY')
|
|
99
|
+
nv_only = sum(1 for m in matches if m['status'] == 'NV_ONLY')
|
|
100
|
+
mismatch = sum(1 for m in matches if m['status'] == 'MISMATCH')
|
|
101
|
+
|
|
102
|
+
match_rate = (matched / len(matches) * 100) if matches else 0
|
|
103
|
+
|
|
104
|
+
lines.append(f"Match Quality: {match_rate:.1f}%")
|
|
105
|
+
lines.append(f" ✓ Matched: {matched}")
|
|
106
|
+
lines.append(f" ⚠ AMD only: {amd_only} (fusion differences)")
|
|
107
|
+
lines.append(f" ⚠ NVIDIA only: {nv_only} (fusion differences)")
|
|
108
|
+
if mismatch > 0:
|
|
109
|
+
lines.append(f" ✗ Mismatched: {mismatch}")
|
|
110
|
+
lines.append("")
|
|
111
|
+
|
|
112
|
+
# Show kernel details
|
|
113
|
+
if group_by_op:
|
|
114
|
+
lines.extend(_format_kernels_grouped_by_op(matches, show_all))
|
|
115
|
+
else:
|
|
116
|
+
lines.extend(_format_kernels_in_order(matches, show_all))
|
|
117
|
+
|
|
118
|
+
lines.append("")
|
|
119
|
+
|
|
120
|
+
return "\n".join(lines)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _format_kernels_in_order(matches: list[dict[str, Any]], show_all: bool) -> list[str]:
|
|
124
|
+
"""Format kernels in position order."""
|
|
125
|
+
lines = []
|
|
126
|
+
lines.append("Kernel-to-Kernel Comparison (representative execution):")
|
|
127
|
+
lines.append("")
|
|
128
|
+
lines.append(f"{'Pos':<4} {'AMD Kernel':<45} {'NVIDIA Kernel':<45} {'Status':<8}")
|
|
129
|
+
lines.append("-" * 110)
|
|
130
|
+
|
|
131
|
+
max_pairs = len(matches) if show_all else min(20, len(matches))
|
|
132
|
+
|
|
133
|
+
for idx, match in enumerate(matches[:max_pairs], 1):
|
|
134
|
+
status_icon = {
|
|
135
|
+
'MATCH': '✓',
|
|
136
|
+
'AMD_ONLY': '⚠ AMD',
|
|
137
|
+
'NV_ONLY': '⚠ NV',
|
|
138
|
+
'MISMATCH': '✗',
|
|
139
|
+
}.get(match['status'], '?')
|
|
140
|
+
|
|
141
|
+
# Add operation type label
|
|
142
|
+
if idx == 1 or (idx > 1 and match['amd_type'] != matches[idx-2]['amd_type']):
|
|
143
|
+
op_type = match['amd_type'] if match['amd_type'] != '-' else match['nv_type']
|
|
144
|
+
lines.append("")
|
|
145
|
+
lines.append(f"[{op_type}]")
|
|
146
|
+
|
|
147
|
+
amd_name = match['amd_name'][:44] if match['amd_name'] != '-' else '-'
|
|
148
|
+
nv_name = match['nv_name'][:44] if match['nv_name'] != '-' else '-'
|
|
149
|
+
|
|
150
|
+
lines.append(f"{idx:<4} {amd_name:<45} {nv_name:<45} {status_icon:<8}")
|
|
151
|
+
|
|
152
|
+
if not show_all and len(matches) > 20:
|
|
153
|
+
lines.append(f" ... ({len(matches) - 20} more kernel pairs)")
|
|
154
|
+
|
|
155
|
+
return lines
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _format_kernels_grouped_by_op(matches: list[dict[str, Any]], show_all: bool) -> list[str]:
|
|
159
|
+
"""Format kernels grouped by operation type."""
|
|
160
|
+
lines = []
|
|
161
|
+
lines.append("Kernel-to-Kernel Comparison (representative execution):")
|
|
162
|
+
lines.append("")
|
|
163
|
+
|
|
164
|
+
# Group matches by operation type
|
|
165
|
+
by_op: dict[str, list[tuple[int, dict[str, Any]]]] = defaultdict(list)
|
|
166
|
+
for idx, match in enumerate(matches, 1):
|
|
167
|
+
op_type = match['amd_type'] if match['amd_type'] != '-' else match['nv_type']
|
|
168
|
+
by_op[op_type].append((idx, match))
|
|
169
|
+
|
|
170
|
+
# Sort operations by first appearance
|
|
171
|
+
sorted_ops = sorted(by_op.items(), key=lambda x: x[1][0][0])
|
|
172
|
+
|
|
173
|
+
for op_type, op_matches in sorted_ops:
|
|
174
|
+
lines.append(f"── {op_type} ({len(op_matches)} kernel pairs) " + "─" * (80 - len(f"── {op_type} ({len(op_matches)} kernel pairs) ")))
|
|
175
|
+
lines.append(f"{'Pos':<4} {'AMD Kernel':<45} {'NVIDIA Kernel':<45} {'Status':<8}")
|
|
176
|
+
lines.append("-" * 110)
|
|
177
|
+
|
|
178
|
+
max_to_show = len(op_matches) if show_all else min(3, len(op_matches))
|
|
179
|
+
|
|
180
|
+
for idx, match in op_matches[:max_to_show]:
|
|
181
|
+
status_icon = {
|
|
182
|
+
'MATCH': '✓',
|
|
183
|
+
'AMD_ONLY': '⚠ AMD',
|
|
184
|
+
'NV_ONLY': '⚠ NV',
|
|
185
|
+
'MISMATCH': '✗',
|
|
186
|
+
}.get(match['status'], '?')
|
|
187
|
+
|
|
188
|
+
amd_name = match['amd_name'][:44] if match['amd_name'] != '-' else '-'
|
|
189
|
+
nv_name = match['nv_name'][:44] if match['nv_name'] != '-' else '-'
|
|
190
|
+
|
|
191
|
+
lines.append(f"{idx:<4} {amd_name:<45} {nv_name:<45} {status_icon:<8}")
|
|
192
|
+
|
|
193
|
+
if not show_all and len(op_matches) > 3:
|
|
194
|
+
lines.append(f" ... ({len(op_matches) - 3} more {op_type} pairs)")
|
|
195
|
+
|
|
196
|
+
lines.append("")
|
|
197
|
+
|
|
198
|
+
return lines
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _group_by_pattern(
|
|
202
|
+
graph_pairs: list[dict[str, Any]],
|
|
203
|
+
platform: str
|
|
204
|
+
) -> dict[tuple, list[dict[str, Any]]]:
|
|
205
|
+
"""Group graph executions by their kernel sequence pattern."""
|
|
206
|
+
patterns: dict[tuple, list[dict[str, Any]]] = defaultdict(list)
|
|
207
|
+
|
|
208
|
+
kernels_key = f'{platform}_kernels'
|
|
209
|
+
|
|
210
|
+
for pair in graph_pairs:
|
|
211
|
+
kernels = pair[kernels_key]
|
|
212
|
+
sorted_kernels = sorted(kernels, key=lambda x: x.get('ts', 0))
|
|
213
|
+
|
|
214
|
+
# Pattern signature: tuple of kernel names in order
|
|
215
|
+
signature = tuple(k.get('name', '') for k in sorted_kernels)
|
|
216
|
+
patterns[signature].append(pair)
|
|
217
|
+
|
|
218
|
+
# Sort by frequency (most common first)
|
|
219
|
+
sorted_patterns = dict(sorted(
|
|
220
|
+
patterns.items(),
|
|
221
|
+
key=lambda x: len(x[1]),
|
|
222
|
+
reverse=True
|
|
223
|
+
))
|
|
224
|
+
|
|
225
|
+
return sorted_patterns
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""Deterministic kernel matching using CUDA graph execution order.
|
|
2
|
+
|
|
3
|
+
This module provides 98-99% deterministic matching by leveraging the fact that
|
|
4
|
+
both AMD and NVIDIA traces execute CUDA graphs in identical order, and kernels
|
|
5
|
+
within each graph execute in deterministic timestamp order.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import orjson
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class KernelMatch:
|
|
16
|
+
"""A matched pair of kernels from AMD and NVIDIA traces."""
|
|
17
|
+
|
|
18
|
+
graph_index: int # Which graph execution this belongs to (0-184)
|
|
19
|
+
position_in_graph: int # Position within the graph (0-based)
|
|
20
|
+
|
|
21
|
+
amd_kernel: dict[str, Any]
|
|
22
|
+
nvidia_kernel: dict[str, Any]
|
|
23
|
+
|
|
24
|
+
operation_type: str # GEMM, ATTN, RMS, etc.
|
|
25
|
+
confidence: float # 1.0 = perfect match, <1.0 = potential fusion difference
|
|
26
|
+
|
|
27
|
+
# For debugging/validation
|
|
28
|
+
amd_correlation: int
|
|
29
|
+
nvidia_correlation: int
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class GraphPair:
|
|
34
|
+
"""A pair of matched CUDA graphs from AMD and NVIDIA traces."""
|
|
35
|
+
|
|
36
|
+
graph_index: int
|
|
37
|
+
amd_correlation: int
|
|
38
|
+
nvidia_correlation: int
|
|
39
|
+
|
|
40
|
+
amd_kernels: list[dict[str, Any]]
|
|
41
|
+
nvidia_kernels: list[dict[str, Any]]
|
|
42
|
+
|
|
43
|
+
is_layer: bool # True if this is a transformer layer (>100 kernels)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def classify_kernel(name: str) -> str:
|
|
47
|
+
"""Classify kernel by operation type.
|
|
48
|
+
|
|
49
|
+
This is a coarse classification for matching purposes.
|
|
50
|
+
"""
|
|
51
|
+
nl = name.lower()
|
|
52
|
+
|
|
53
|
+
if 'cijk_' in nl or 'nvjet' in nl:
|
|
54
|
+
return 'GEMM'
|
|
55
|
+
elif 'attention' in nl or 'fmha' in nl:
|
|
56
|
+
return 'ATTN'
|
|
57
|
+
elif 'reshape_and_cache' in nl:
|
|
58
|
+
return 'KV'
|
|
59
|
+
elif 'triton_per' in nl and 'rsqrt' in nl:
|
|
60
|
+
return 'RMS'
|
|
61
|
+
elif 'triton_poi' in nl and 'silu' in nl:
|
|
62
|
+
return 'SILU'
|
|
63
|
+
elif 'triton_poi' in nl:
|
|
64
|
+
return 'POI'
|
|
65
|
+
elif 'triton_red' in nl:
|
|
66
|
+
return 'RED'
|
|
67
|
+
elif 'reduce_segments' in nl:
|
|
68
|
+
return 'RSEG'
|
|
69
|
+
else:
|
|
70
|
+
return 'OTH'
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def is_platform_specific_kernel(name: str, platform: str) -> bool:
|
|
74
|
+
"""Check if kernel is platform-specific and should be excluded from matching.
|
|
75
|
+
|
|
76
|
+
AMD runs reduce_segments after attention operations, but NVIDIA fuses this
|
|
77
|
+
into adjacent kernels. We need to filter these out for accurate matching.
|
|
78
|
+
"""
|
|
79
|
+
if platform == "AMD":
|
|
80
|
+
return 'reduce_segments' in name.lower()
|
|
81
|
+
|
|
82
|
+
# Add NVIDIA-specific exclusions here if discovered
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def load_graph_execution_order(trace_path: str) -> list[tuple[int, int]]:
|
|
87
|
+
"""Load CUDA graph execution order from trace.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
List of (timestamp, correlation_id) tuples in execution order
|
|
91
|
+
"""
|
|
92
|
+
with open(trace_path, "rb") as f:
|
|
93
|
+
trace = orjson.loads(f.read())
|
|
94
|
+
|
|
95
|
+
graph_launches = []
|
|
96
|
+
for event in trace.get("traceEvents", []):
|
|
97
|
+
if event.get("cat") == "cuda_runtime":
|
|
98
|
+
name = event.get("name", "")
|
|
99
|
+
if "GraphLaunch" in name or "graphLaunch" in name.lower():
|
|
100
|
+
ts = event.get("ts")
|
|
101
|
+
corr_id = event.get("args", {}).get("correlation")
|
|
102
|
+
if ts is not None and corr_id is not None:
|
|
103
|
+
graph_launches.append((ts, corr_id))
|
|
104
|
+
|
|
105
|
+
# Sort by timestamp to get execution order
|
|
106
|
+
graph_launches.sort()
|
|
107
|
+
return graph_launches
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def load_kernels_for_correlation(trace_path: str, correlation_id: int, platform: str) -> list[dict[str, Any]]:
|
|
111
|
+
"""Load all kernels for a given correlation ID in timestamp order.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
trace_path: Path to trace JSON
|
|
115
|
+
correlation_id: Correlation ID to filter by
|
|
116
|
+
platform: "AMD" or "NVIDIA" for platform-specific filtering
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
List of kernel events sorted by timestamp, with platform-specific kernels removed
|
|
120
|
+
"""
|
|
121
|
+
with open(trace_path, "rb") as f:
|
|
122
|
+
trace = orjson.loads(f.read())
|
|
123
|
+
|
|
124
|
+
kernels = []
|
|
125
|
+
for event in trace.get("traceEvents", []):
|
|
126
|
+
if event.get("cat") == "kernel":
|
|
127
|
+
corr_id = event.get("args", {}).get("correlation")
|
|
128
|
+
if corr_id == correlation_id:
|
|
129
|
+
name = event.get("name", "")
|
|
130
|
+
|
|
131
|
+
# Skip platform-specific kernels
|
|
132
|
+
if is_platform_specific_kernel(name, platform):
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
kernels.append({
|
|
136
|
+
"name": name,
|
|
137
|
+
"ts": event.get("ts"),
|
|
138
|
+
"dur": event.get("dur", 0),
|
|
139
|
+
"correlation": corr_id,
|
|
140
|
+
"args": event.get("args", {}),
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
# Sort by timestamp for deterministic ordering
|
|
144
|
+
kernels.sort(key=lambda k: k["ts"])
|
|
145
|
+
return kernels
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def match_traces(
|
|
149
|
+
amd_trace_path: str,
|
|
150
|
+
nvidia_trace_path: str
|
|
151
|
+
) -> tuple[list[GraphPair], list[KernelMatch]]:
|
|
152
|
+
"""Match kernels between AMD and NVIDIA traces using graph execution order.
|
|
153
|
+
|
|
154
|
+
This provides 98-99% deterministic matching by:
|
|
155
|
+
1. Matching graphs by execution order (100% deterministic)
|
|
156
|
+
2. Matching kernels by position within graphs (98-99% deterministic)
|
|
157
|
+
3. Filtering platform-specific operations (e.g., AMD's reduce_segments)
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
amd_trace_path: Path to AMD trace JSON
|
|
161
|
+
nvidia_trace_path: Path to NVIDIA trace JSON
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Tuple of (graph_pairs, kernel_matches)
|
|
165
|
+
- graph_pairs: List of matched CUDA graph pairs
|
|
166
|
+
- kernel_matches: List of all kernel matches across all graphs
|
|
167
|
+
"""
|
|
168
|
+
# Step 1: Get graph execution order from both traces
|
|
169
|
+
amd_graphs = load_graph_execution_order(amd_trace_path)
|
|
170
|
+
nvidia_graphs = load_graph_execution_order(nvidia_trace_path)
|
|
171
|
+
|
|
172
|
+
if len(amd_graphs) != len(nvidia_graphs):
|
|
173
|
+
raise ValueError(
|
|
174
|
+
f"Graph count mismatch: AMD has {len(amd_graphs)} graphs, "
|
|
175
|
+
f"NVIDIA has {len(nvidia_graphs)} graphs. "
|
|
176
|
+
"Traces may be from different workloads."
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
graph_pairs = []
|
|
180
|
+
kernel_matches = []
|
|
181
|
+
|
|
182
|
+
# Step 2: Match graphs by execution order
|
|
183
|
+
for graph_idx, ((amd_ts, amd_corr), (nv_ts, nv_corr)) in enumerate(zip(amd_graphs, nvidia_graphs)):
|
|
184
|
+
# Load kernels for this correlation
|
|
185
|
+
amd_kernels = load_kernels_for_correlation(amd_trace_path, amd_corr, "AMD")
|
|
186
|
+
nvidia_kernels = load_kernels_for_correlation(nvidia_trace_path, nv_corr, "NVIDIA")
|
|
187
|
+
|
|
188
|
+
is_layer = len(amd_kernels) > 100 or len(nvidia_kernels) > 100
|
|
189
|
+
|
|
190
|
+
graph_pairs.append(GraphPair(
|
|
191
|
+
graph_index=graph_idx,
|
|
192
|
+
amd_correlation=amd_corr,
|
|
193
|
+
nvidia_correlation=nv_corr,
|
|
194
|
+
amd_kernels=amd_kernels,
|
|
195
|
+
nvidia_kernels=nvidia_kernels,
|
|
196
|
+
is_layer=is_layer,
|
|
197
|
+
))
|
|
198
|
+
|
|
199
|
+
# Step 3: Match kernels within this graph by position
|
|
200
|
+
matches = match_kernels_in_graph(
|
|
201
|
+
graph_idx=graph_idx,
|
|
202
|
+
amd_corr=amd_corr,
|
|
203
|
+
nvidia_corr=nv_corr,
|
|
204
|
+
amd_kernels=amd_kernels,
|
|
205
|
+
nvidia_kernels=nvidia_kernels,
|
|
206
|
+
)
|
|
207
|
+
kernel_matches.extend(matches)
|
|
208
|
+
|
|
209
|
+
return graph_pairs, kernel_matches
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def match_kernels_in_graph(
|
|
213
|
+
graph_idx: int,
|
|
214
|
+
amd_corr: int,
|
|
215
|
+
nvidia_corr: int,
|
|
216
|
+
amd_kernels: list[dict[str, Any]],
|
|
217
|
+
nvidia_kernels: list[dict[str, Any]],
|
|
218
|
+
) -> list[KernelMatch]:
|
|
219
|
+
"""Match kernels within a single CUDA graph by position.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
graph_idx: Index of this graph in execution order
|
|
223
|
+
amd_corr: AMD correlation ID
|
|
224
|
+
nvidia_corr: NVIDIA correlation ID
|
|
225
|
+
amd_kernels: AMD kernels (already sorted by timestamp, filtered for platform-specific ops)
|
|
226
|
+
nvidia_kernels: NVIDIA kernels (already sorted by timestamp, filtered for platform-specific ops)
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
List of kernel matches with confidence scores
|
|
230
|
+
"""
|
|
231
|
+
matches = []
|
|
232
|
+
|
|
233
|
+
# If kernel counts don't match after filtering, something unexpected happened
|
|
234
|
+
if len(amd_kernels) != len(nvidia_kernels):
|
|
235
|
+
# Handle gracefully: match what we can
|
|
236
|
+
min_len = min(len(amd_kernels), len(nvidia_kernels))
|
|
237
|
+
|
|
238
|
+
for i in range(min_len):
|
|
239
|
+
amd_k = amd_kernels[i]
|
|
240
|
+
nv_k = nvidia_kernels[i]
|
|
241
|
+
|
|
242
|
+
amd_type = classify_kernel(amd_k["name"])
|
|
243
|
+
nv_type = classify_kernel(nv_k["name"])
|
|
244
|
+
|
|
245
|
+
# Lower confidence if operation types don't match
|
|
246
|
+
confidence = 1.0 if amd_type == nv_type else 0.5
|
|
247
|
+
|
|
248
|
+
matches.append(KernelMatch(
|
|
249
|
+
graph_index=graph_idx,
|
|
250
|
+
position_in_graph=i,
|
|
251
|
+
amd_kernel=amd_k,
|
|
252
|
+
nvidia_kernel=nv_k,
|
|
253
|
+
operation_type=amd_type,
|
|
254
|
+
confidence=confidence,
|
|
255
|
+
amd_correlation=amd_corr,
|
|
256
|
+
nvidia_correlation=nvidia_corr,
|
|
257
|
+
))
|
|
258
|
+
|
|
259
|
+
# Note: Unmatched kernels are implicitly dropped
|
|
260
|
+
# Could add logging here for debugging
|
|
261
|
+
else:
|
|
262
|
+
# Perfect length match - match by position
|
|
263
|
+
for i, (amd_k, nv_k) in enumerate(zip(amd_kernels, nvidia_kernels)):
|
|
264
|
+
amd_type = classify_kernel(amd_k["name"])
|
|
265
|
+
nv_type = classify_kernel(nv_k["name"])
|
|
266
|
+
|
|
267
|
+
# Confidence = 1.0 if operation types match, else 0.8
|
|
268
|
+
# (0.8 because position-based matching is still very reliable)
|
|
269
|
+
confidence = 1.0 if amd_type == nv_type else 0.8
|
|
270
|
+
|
|
271
|
+
matches.append(KernelMatch(
|
|
272
|
+
graph_index=graph_idx,
|
|
273
|
+
position_in_graph=i,
|
|
274
|
+
amd_kernel=amd_k,
|
|
275
|
+
nvidia_kernel=nv_k,
|
|
276
|
+
operation_type=amd_type if amd_type == nv_type else f"{amd_type}→{nv_type}",
|
|
277
|
+
confidence=confidence,
|
|
278
|
+
amd_correlation=amd_corr,
|
|
279
|
+
nvidia_correlation=nvidia_corr,
|
|
280
|
+
))
|
|
281
|
+
|
|
282
|
+
return matches
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def get_matching_statistics(kernel_matches: list[KernelMatch]) -> dict[str, Any]:
|
|
286
|
+
"""Calculate statistics about matching quality.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Dict with:
|
|
290
|
+
- total_matches: Total kernel pairs matched
|
|
291
|
+
- perfect_matches: Matches with confidence=1.0
|
|
292
|
+
- fuzzy_matches: Matches with confidence<1.0
|
|
293
|
+
- match_rate: Percentage of perfect matches
|
|
294
|
+
- by_operation: Breakdown by operation type
|
|
295
|
+
"""
|
|
296
|
+
total = len(kernel_matches)
|
|
297
|
+
perfect = sum(1 for m in kernel_matches if m.confidence == 1.0)
|
|
298
|
+
|
|
299
|
+
# Breakdown by operation type
|
|
300
|
+
from collections import defaultdict
|
|
301
|
+
by_operation = defaultdict(lambda: {"total": 0, "perfect": 0})
|
|
302
|
+
|
|
303
|
+
for match in kernel_matches:
|
|
304
|
+
op = match.operation_type
|
|
305
|
+
by_operation[op]["total"] += 1
|
|
306
|
+
if match.confidence == 1.0:
|
|
307
|
+
by_operation[op]["perfect"] += 1
|
|
308
|
+
|
|
309
|
+
return {
|
|
310
|
+
"total_matches": total,
|
|
311
|
+
"perfect_matches": perfect,
|
|
312
|
+
"fuzzy_matches": total - perfect,
|
|
313
|
+
"match_rate": perfect / total if total > 0 else 0.0,
|
|
314
|
+
"by_operation": dict(by_operation),
|
|
315
|
+
}
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""Improved kernel matching that preserves fusion information.
|
|
2
|
+
|
|
3
|
+
Key improvements over v1:
|
|
4
|
+
1. Uses existing classifier.py instead of reimplementing
|
|
5
|
+
2. Marks fusion differences instead of filtering them out
|
|
6
|
+
3. Provides detailed fusion analysis
|
|
7
|
+
4. Handles sequence alignment when platforms have different kernel counts
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import orjson
|
|
14
|
+
|
|
15
|
+
from .classifier import classify
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class FusionDifference:
|
|
20
|
+
"""A fusion difference between platforms."""
|
|
21
|
+
|
|
22
|
+
platform_with_kernel: str # "AMD" or "NVIDIA"
|
|
23
|
+
kernel_name: str
|
|
24
|
+
operation_type: str
|
|
25
|
+
position: int
|
|
26
|
+
likely_fused_into: str | None = None # Best guess of where this work went
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class KernelMatch:
|
|
31
|
+
"""A matched pair of kernels, or a fusion difference."""
|
|
32
|
+
|
|
33
|
+
graph_index: int
|
|
34
|
+
amd_position: int | None # None if this is NVIDIA-only
|
|
35
|
+
nvidia_position: int | None # None if this is AMD-only
|
|
36
|
+
|
|
37
|
+
amd_kernel: dict[str, Any] | None
|
|
38
|
+
nvidia_kernel: dict[str, Any] | None
|
|
39
|
+
|
|
40
|
+
operation_type: str
|
|
41
|
+
confidence: float # 1.0 = perfect, 0.5 = fusion difference
|
|
42
|
+
|
|
43
|
+
# If this is a fusion difference
|
|
44
|
+
is_fusion_difference: bool = False
|
|
45
|
+
fusion_info: FusionDifference | None = None
|
|
46
|
+
|
|
47
|
+
amd_correlation: int | None = None
|
|
48
|
+
nvidia_correlation: int | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class GraphPair:
|
|
53
|
+
"""Matched CUDA graph pair with fusion analysis."""
|
|
54
|
+
|
|
55
|
+
graph_index: int
|
|
56
|
+
amd_correlation: int
|
|
57
|
+
nvidia_correlation: int
|
|
58
|
+
|
|
59
|
+
amd_kernels: list[dict[str, Any]]
|
|
60
|
+
nvidia_kernels: list[dict[str, Any]]
|
|
61
|
+
|
|
62
|
+
is_layer: bool
|
|
63
|
+
fusion_differences: list[FusionDifference] = field(default_factory=list)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def load_graph_execution_order(trace_path: str) -> list[tuple[int, int]]:
|
|
67
|
+
"""Load CUDA graph execution order."""
|
|
68
|
+
with open(trace_path, "rb") as f:
|
|
69
|
+
trace = orjson.loads(f.read())
|
|
70
|
+
|
|
71
|
+
graph_launches = []
|
|
72
|
+
for event in trace.get("traceEvents", []):
|
|
73
|
+
if event.get("cat") == "cuda_runtime":
|
|
74
|
+
name = event.get("name", "")
|
|
75
|
+
if "GraphLaunch" in name or "graphLaunch" in name.lower():
|
|
76
|
+
ts = event.get("ts")
|
|
77
|
+
corr_id = event.get("args", {}).get("correlation")
|
|
78
|
+
if ts is not None and corr_id is not None:
|
|
79
|
+
graph_launches.append((ts, corr_id))
|
|
80
|
+
|
|
81
|
+
graph_launches.sort()
|
|
82
|
+
return graph_launches
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def load_kernels_for_correlation(
|
|
86
|
+
trace_path: str, correlation_id: int, platform: str
|
|
87
|
+
) -> list[dict[str, Any]]:
|
|
88
|
+
"""Load all kernels for a correlation, keeping ALL kernels including fusion differences."""
|
|
89
|
+
with open(trace_path, "rb") as f:
|
|
90
|
+
trace = orjson.loads(f.read())
|
|
91
|
+
|
|
92
|
+
kernels = []
|
|
93
|
+
for event in trace.get("traceEvents", []):
|
|
94
|
+
if event.get("cat") == "kernel":
|
|
95
|
+
corr_id = event.get("args", {}).get("correlation")
|
|
96
|
+
if corr_id == correlation_id:
|
|
97
|
+
kernels.append({
|
|
98
|
+
"name": event.get("name", ""),
|
|
99
|
+
"ts": event.get("ts"),
|
|
100
|
+
"dur": event.get("dur", 0),
|
|
101
|
+
"correlation": corr_id,
|
|
102
|
+
"args": event.get("args", {}),
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
kernels.sort(key=lambda k: k["ts"])
|
|
106
|
+
return kernels
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def align_sequences_with_fusion(
|
|
110
|
+
amd_kernels: list[dict[str, Any]],
|
|
111
|
+
nvidia_kernels: list[dict[str, Any]],
|
|
112
|
+
platform_amd: str = "AMD",
|
|
113
|
+
platform_nvidia: str = "NVIDIA",
|
|
114
|
+
) -> list[tuple[int | None, int | None, str]]:
|
|
115
|
+
"""Align two kernel sequences, identifying fusion differences.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
List of (amd_index, nvidia_index, alignment_type) where:
|
|
119
|
+
- alignment_type is "match", "amd_only", or "nvidia_only"
|
|
120
|
+
"""
|
|
121
|
+
# Classify all kernels
|
|
122
|
+
amd_ops = [classify(k["name"], platform_amd)[0].value for k in amd_kernels]
|
|
123
|
+
nvidia_ops = [classify(k["name"], platform_nvidia)[0].value for k in nvidia_kernels]
|
|
124
|
+
|
|
125
|
+
# Use simple sequence alignment
|
|
126
|
+
# For now, handle the common case: AMD has extra "reduce_segments" operations
|
|
127
|
+
alignments = []
|
|
128
|
+
amd_i = 0
|
|
129
|
+
nv_i = 0
|
|
130
|
+
|
|
131
|
+
while amd_i < len(amd_ops) or nv_i < len(nvidia_ops):
|
|
132
|
+
if amd_i >= len(amd_ops):
|
|
133
|
+
# Remaining NVIDIA ops
|
|
134
|
+
alignments.append((None, nv_i, "nvidia_only"))
|
|
135
|
+
nv_i += 1
|
|
136
|
+
elif nv_i >= len(nvidia_ops):
|
|
137
|
+
# Remaining AMD ops
|
|
138
|
+
alignments.append((amd_i, None, "amd_only"))
|
|
139
|
+
amd_i += 1
|
|
140
|
+
elif amd_ops[amd_i] == nvidia_ops[nv_i]:
|
|
141
|
+
# Match
|
|
142
|
+
alignments.append((amd_i, nv_i, "match"))
|
|
143
|
+
amd_i += 1
|
|
144
|
+
nv_i += 1
|
|
145
|
+
else:
|
|
146
|
+
# Mismatch - check if AMD has an extra operation
|
|
147
|
+
amd_kernel_name = amd_kernels[amd_i]["name"].lower()
|
|
148
|
+
|
|
149
|
+
# Known fusion differences
|
|
150
|
+
if "reduce_segments" in amd_kernel_name:
|
|
151
|
+
# AMD has reduce_segments, NVIDIA fuses it
|
|
152
|
+
alignments.append((amd_i, None, "amd_only"))
|
|
153
|
+
amd_i += 1
|
|
154
|
+
else:
|
|
155
|
+
# Unknown mismatch - try to match anyway
|
|
156
|
+
alignments.append((amd_i, nv_i, "match"))
|
|
157
|
+
amd_i += 1
|
|
158
|
+
nv_i += 1
|
|
159
|
+
|
|
160
|
+
return alignments
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def match_traces(
|
|
164
|
+
amd_trace_path: str,
|
|
165
|
+
nvidia_trace_path: str,
|
|
166
|
+
) -> tuple[list[GraphPair], list[KernelMatch]]:
|
|
167
|
+
"""Match traces with fusion difference detection."""
|
|
168
|
+
amd_graphs = load_graph_execution_order(amd_trace_path)
|
|
169
|
+
nvidia_graphs = load_graph_execution_order(nvidia_trace_path)
|
|
170
|
+
|
|
171
|
+
if len(amd_graphs) != len(nvidia_graphs):
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"Graph count mismatch: AMD={len(amd_graphs)}, NVIDIA={len(nvidia_graphs)}"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Detect platform from first kernel
|
|
177
|
+
with open(amd_trace_path, "rb") as f:
|
|
178
|
+
amd_trace = orjson.loads(f.read())
|
|
179
|
+
props = amd_trace.get("deviceProperties", [{}])[0]
|
|
180
|
+
platform_amd = "AMD" if amd_trace.get("roctracer_version") or props.get("warpSize") == 64 else "NVIDIA"
|
|
181
|
+
|
|
182
|
+
with open(nvidia_trace_path, "rb") as f:
|
|
183
|
+
nvidia_trace = orjson.loads(f.read())
|
|
184
|
+
props = nvidia_trace.get("deviceProperties", [{}])[0]
|
|
185
|
+
platform_nvidia = "AMD" if nvidia_trace.get("roctracer_version") or props.get("warpSize") == 64 else "NVIDIA"
|
|
186
|
+
|
|
187
|
+
graph_pairs = []
|
|
188
|
+
kernel_matches = []
|
|
189
|
+
|
|
190
|
+
for graph_idx, ((amd_ts, amd_corr), (nv_ts, nv_corr)) in enumerate(
|
|
191
|
+
zip(amd_graphs, nvidia_graphs)
|
|
192
|
+
):
|
|
193
|
+
amd_kernels = load_kernels_for_correlation(amd_trace_path, amd_corr, platform_amd)
|
|
194
|
+
nvidia_kernels = load_kernels_for_correlation(
|
|
195
|
+
nvidia_trace_path, nv_corr, platform_nvidia
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
is_layer = len(amd_kernels) > 100 or len(nvidia_kernels) > 100
|
|
199
|
+
|
|
200
|
+
# Align sequences
|
|
201
|
+
alignments = align_sequences_with_fusion(
|
|
202
|
+
amd_kernels, nvidia_kernels, platform_amd, platform_nvidia
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Create matches and track fusion differences
|
|
206
|
+
fusion_diffs = []
|
|
207
|
+
|
|
208
|
+
for amd_i, nv_i, align_type in alignments:
|
|
209
|
+
if align_type == "match":
|
|
210
|
+
# Perfect match
|
|
211
|
+
amd_k = amd_kernels[amd_i]
|
|
212
|
+
nv_k = nvidia_kernels[nv_i]
|
|
213
|
+
|
|
214
|
+
amd_op, _ = classify(amd_k["name"], platform_amd)
|
|
215
|
+
nv_op, _ = classify(nv_k["name"], platform_nvidia)
|
|
216
|
+
|
|
217
|
+
confidence = 1.0 if amd_op == nv_op else 0.8
|
|
218
|
+
|
|
219
|
+
kernel_matches.append(
|
|
220
|
+
KernelMatch(
|
|
221
|
+
graph_index=graph_idx,
|
|
222
|
+
amd_position=amd_i,
|
|
223
|
+
nvidia_position=nv_i,
|
|
224
|
+
amd_kernel=amd_k,
|
|
225
|
+
nvidia_kernel=nv_k,
|
|
226
|
+
operation_type=amd_op.value,
|
|
227
|
+
confidence=confidence,
|
|
228
|
+
amd_correlation=amd_corr,
|
|
229
|
+
nvidia_correlation=nv_corr,
|
|
230
|
+
)
|
|
231
|
+
)
|
|
232
|
+
elif align_type == "amd_only":
|
|
233
|
+
# Fusion difference: AMD has this, NVIDIA fused it
|
|
234
|
+
amd_k = amd_kernels[amd_i]
|
|
235
|
+
amd_op, _ = classify(amd_k["name"], platform_amd)
|
|
236
|
+
|
|
237
|
+
fusion_diff = FusionDifference(
|
|
238
|
+
platform_with_kernel="AMD",
|
|
239
|
+
kernel_name=amd_k["name"],
|
|
240
|
+
operation_type=amd_op.value,
|
|
241
|
+
position=amd_i,
|
|
242
|
+
likely_fused_into="adjacent operation (NVIDIA fuses)",
|
|
243
|
+
)
|
|
244
|
+
fusion_diffs.append(fusion_diff)
|
|
245
|
+
|
|
246
|
+
kernel_matches.append(
|
|
247
|
+
KernelMatch(
|
|
248
|
+
graph_index=graph_idx,
|
|
249
|
+
amd_position=amd_i,
|
|
250
|
+
nvidia_position=None,
|
|
251
|
+
amd_kernel=amd_k,
|
|
252
|
+
nvidia_kernel=None,
|
|
253
|
+
operation_type=amd_op.value,
|
|
254
|
+
confidence=0.5,
|
|
255
|
+
is_fusion_difference=True,
|
|
256
|
+
fusion_info=fusion_diff,
|
|
257
|
+
amd_correlation=amd_corr,
|
|
258
|
+
nvidia_correlation=nv_corr,
|
|
259
|
+
)
|
|
260
|
+
)
|
|
261
|
+
elif align_type == "nvidia_only":
|
|
262
|
+
# Fusion difference: NVIDIA has this, AMD fused it
|
|
263
|
+
nv_k = nvidia_kernels[nv_i]
|
|
264
|
+
nv_op, _ = classify(nv_k["name"], platform_nvidia)
|
|
265
|
+
|
|
266
|
+
fusion_diff = FusionDifference(
|
|
267
|
+
platform_with_kernel="NVIDIA",
|
|
268
|
+
kernel_name=nv_k["name"],
|
|
269
|
+
operation_type=nv_op.value,
|
|
270
|
+
position=nv_i,
|
|
271
|
+
likely_fused_into="adjacent operation (AMD fuses)",
|
|
272
|
+
)
|
|
273
|
+
fusion_diffs.append(fusion_diff)
|
|
274
|
+
|
|
275
|
+
kernel_matches.append(
|
|
276
|
+
KernelMatch(
|
|
277
|
+
graph_index=graph_idx,
|
|
278
|
+
amd_position=None,
|
|
279
|
+
nvidia_position=nv_i,
|
|
280
|
+
amd_kernel=None,
|
|
281
|
+
nvidia_kernel=nv_k,
|
|
282
|
+
operation_type=nv_op.value,
|
|
283
|
+
confidence=0.5,
|
|
284
|
+
is_fusion_difference=True,
|
|
285
|
+
fusion_info=fusion_diff,
|
|
286
|
+
amd_correlation=amd_corr,
|
|
287
|
+
nvidia_correlation=nv_corr,
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
graph_pairs.append(
|
|
292
|
+
GraphPair(
|
|
293
|
+
graph_index=graph_idx,
|
|
294
|
+
amd_correlation=amd_corr,
|
|
295
|
+
nvidia_correlation=nv_corr,
|
|
296
|
+
amd_kernels=amd_kernels,
|
|
297
|
+
nvidia_kernels=nvidia_kernels,
|
|
298
|
+
is_layer=is_layer,
|
|
299
|
+
fusion_differences=fusion_diffs,
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
return graph_pairs, kernel_matches
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def get_matching_statistics(kernel_matches: list[KernelMatch]) -> dict[str, Any]:
|
|
307
|
+
"""Calculate statistics including fusion analysis."""
|
|
308
|
+
total = len(kernel_matches)
|
|
309
|
+
perfect = sum(1 for m in kernel_matches if m.confidence == 1.0)
|
|
310
|
+
fusion_diffs = sum(1 for m in kernel_matches if m.is_fusion_difference)
|
|
311
|
+
|
|
312
|
+
# Breakdown by operation type
|
|
313
|
+
from collections import defaultdict
|
|
314
|
+
|
|
315
|
+
by_operation = defaultdict(lambda: {"total": 0, "perfect": 0, "fusion": 0})
|
|
316
|
+
|
|
317
|
+
for match in kernel_matches:
|
|
318
|
+
op = match.operation_type
|
|
319
|
+
by_operation[op]["total"] += 1
|
|
320
|
+
if match.confidence == 1.0:
|
|
321
|
+
by_operation[op]["perfect"] += 1
|
|
322
|
+
if match.is_fusion_difference:
|
|
323
|
+
by_operation[op]["fusion"] += 1
|
|
324
|
+
|
|
325
|
+
return {
|
|
326
|
+
"total_matches": total,
|
|
327
|
+
"perfect_matches": perfect,
|
|
328
|
+
"fuzzy_matches": total - perfect - fusion_diffs,
|
|
329
|
+
"fusion_differences": fusion_diffs,
|
|
330
|
+
"match_rate": perfect / total if total > 0 else 0.0,
|
|
331
|
+
"by_operation": dict(by_operation),
|
|
332
|
+
}
|
|
@@ -319,7 +319,7 @@ wafer_core/lib/rocprofiler/systems/run/profiler.py,sha256=aiQLsDnfQHSeCM5zLnO4Vl
|
|
|
319
319
|
wafer_core/lib/rocprofiler/systems/sample/__init__.py,sha256=31rNmLPQ7OVhvlOEEOwPKgk8_qrCidj6AmzDXexQJ_o,288
|
|
320
320
|
wafer_core/lib/rocprofiler/systems/sample/profiler.py,sha256=CYZPTzNXd48LoCfmY6h_5RSYEdWYccuv3-t4YncHJLE,7384
|
|
321
321
|
wafer_core/lib/trace_compare/PERFORMANCE.md,sha256=jkJh7ApZi8H7NKTcz8v0LNtwSFtIUqY88e3QbL749ww,3823
|
|
322
|
-
wafer_core/lib/trace_compare/__init__.py,sha256=
|
|
322
|
+
wafer_core/lib/trace_compare/__init__.py,sha256=Xi_0mG2jwhow0VDuQxWtKshEKLa98PoRbu9CClc7V7k,1388
|
|
323
323
|
wafer_core/lib/trace_compare/aligner.py,sha256=1S8Ob3RaEsIjN0HdqEx0yGsW5uf_lMrJVSH_MnZhKok,13788
|
|
324
324
|
wafer_core/lib/trace_compare/analyzer.py,sha256=o0SI1PsehpgxeUPQEB9708W_Q_ILiO5apgqVLe2xE8A,14541
|
|
325
325
|
wafer_core/lib/trace_compare/api.py,sha256=JSRTcd7eZK1Z8l18TFEiA5A8ENJS1TMz7oIiw1KBbAs,8796
|
|
@@ -327,6 +327,10 @@ wafer_core/lib/trace_compare/architecture.py,sha256=8bqlAJQeJLBHblyXvFV-w55PIKiV
|
|
|
327
327
|
wafer_core/lib/trace_compare/classifier.py,sha256=sE1K007GVk_Up2g59SVUIZ7BThf0yHNjGsZ9AyM_Ah8,6028
|
|
328
328
|
wafer_core/lib/trace_compare/formatter.py,sha256=GNrCZ45ueBN05CEXjOtTuKvTI8z-g-ZZFil-ni3sWVY,37962
|
|
329
329
|
wafer_core/lib/trace_compare/fusion_analyzer.py,sha256=xmVEF9qeroMa-ONfpnn64_q-aLyAjZ-9EIAYdVpIHKI,38555
|
|
330
|
+
wafer_core/lib/trace_compare/graph_formatter.py,sha256=SA5hsA-QOEAIeV6pJGMA1HnNFPhBqH3K1on7pRHrxLM,9891
|
|
331
|
+
wafer_core/lib/trace_compare/graph_formatter_detailed.py,sha256=rguc9UsgjFFfDP3VXZzAUMsXOnBEbV03Ug6D1-0Ktxs,8956
|
|
332
|
+
wafer_core/lib/trace_compare/graph_matcher.py,sha256=n8vB72zIBp2l24jOP-nj3Lg-UnphPBdJl6fBc5QX4XA,10633
|
|
333
|
+
wafer_core/lib/trace_compare/graph_matcher_v2.py,sha256=NUcdnGsphKyutP_1n1o_2uUG6NDBDj9VyIcbPJlzdOU,11696
|
|
330
334
|
wafer_core/lib/trace_compare/kernel_registry.yaml,sha256=0-knXwsF3pR1x1JdIz-aWaH-5xDgTylh53E47Kf6nHo,9808
|
|
331
335
|
wafer_core/lib/trace_compare/layer_segmentation.py,sha256=kI_Y1e9nrKZfdwfcrGo4h7gpMxqXI_xkgXk46zuFen4,4642
|
|
332
336
|
wafer_core/lib/trace_compare/loader.py,sha256=z3gO7CV8AxZloWUCA0aA3pwkNiEnEobdLQBAII41cGY,16129
|
|
@@ -723,6 +727,6 @@ wafer_core/utils/modal_execution/modal_app.py,sha256=VfS2cX8gHtnlPXemmMcEwDPeQdh
|
|
|
723
727
|
wafer_core/utils/modal_execution/modal_config.py,sha256=7cGX9TGqilQ3qxI3OFGXV5orjtyRU-PEDOJ4vP2oxno,4421
|
|
724
728
|
wafer_core/utils/modal_execution/modal_execution.py,sha256=gChjnV6jqA3A7IRP3DfvV5cSfm_MN0X4f7JZufXgdZE,24594
|
|
725
729
|
wafer_core/utils/modal_execution/test_modal.py,sha256=_jqou_hrLs1Daf1590Pnb0a_lXMMa2rczAPpW9HpoNQ,8153
|
|
726
|
-
wafer_core-0.1.
|
|
727
|
-
wafer_core-0.1.
|
|
728
|
-
wafer_core-0.1.
|
|
730
|
+
wafer_core-0.1.46.dist-info/METADATA,sha256=rgFOq_IA8Z0JLFzshlHqGDDZPo50owAGlqWuFhiu_HY,1477
|
|
731
|
+
wafer_core-0.1.46.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
732
|
+
wafer_core-0.1.46.dist-info/RECORD,,
|
|
File without changes
|