wafer-core 0.1.26__py3-none-any.whl → 0.1.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,119 @@
1
+ """Same kernel analysis - comparing identical kernel names across platforms.
2
+
3
+ Identifies kernels where AMD and NVIDIA use the same kernel name/pattern
4
+ and compares their performance directly.
5
+ """
6
+
7
+ from collections import defaultdict
8
+ from dataclasses import dataclass, field
9
+ from typing import Any
10
+
11
+ from .aligner import KernelPair, LayerAlignment
12
+
13
+
14
+ @dataclass
15
+ class SameKernelComparison:
16
+ """Comparison of identical kernels across platforms."""
17
+
18
+ layer: int
19
+ kernel_name: str
20
+ operation: str
21
+ amd_avg_us: float
22
+ nvidia_avg_us: float
23
+ ratio: float
24
+ gap_us: float
25
+ amd_count: int
26
+ nvidia_count: int
27
+
28
+
29
+ @dataclass
30
+ class SameKernelAnalysis:
31
+ """Complete same kernel analysis result."""
32
+
33
+ kernels: list[SameKernelComparison] = field(default_factory=list)
34
+ summary: dict[str, Any] = field(default_factory=dict)
35
+
36
+
37
+ def analyze_same_kernels(
38
+ layer_alignments: list[LayerAlignment],
39
+ ) -> SameKernelAnalysis:
40
+ """Find and compare kernels with identical names across platforms.
41
+
42
+ Args:
43
+ layer_alignments: List of aligned layers
44
+
45
+ Returns:
46
+ SameKernelAnalysis with comparisons
47
+ """
48
+ same_kernels: list[SameKernelComparison] = []
49
+
50
+ for layer_alignment in layer_alignments:
51
+ for pair in layer_alignment.kernel_pairs:
52
+ if pair.is_same_kernel and pair.amd_kernel and pair.nvidia_kernel:
53
+ same_kernels.append(
54
+ SameKernelComparison(
55
+ layer=layer_alignment.layer,
56
+ kernel_name=pair.amd_kernel,
57
+ operation=pair.operation,
58
+ amd_avg_us=pair.amd_avg_us,
59
+ nvidia_avg_us=pair.nvidia_avg_us,
60
+ ratio=pair.ratio,
61
+ gap_us=pair.gap_us,
62
+ amd_count=pair.amd_count,
63
+ nvidia_count=pair.nvidia_count,
64
+ )
65
+ )
66
+
67
+ if same_kernels:
68
+ ratios = [k.ratio for k in same_kernels if k.ratio != float("inf")]
69
+ avg_ratio = sum(ratios) / len(ratios) if ratios else 1.0
70
+ amd_faster = sum(1 for k in same_kernels if k.ratio < 1.0)
71
+ nvidia_faster = sum(1 for k in same_kernels if k.ratio > 1.0)
72
+ else:
73
+ avg_ratio = 1.0
74
+ amd_faster = 0
75
+ nvidia_faster = 0
76
+
77
+ return SameKernelAnalysis(
78
+ kernels=same_kernels,
79
+ summary={
80
+ "total_same_kernels": len(same_kernels),
81
+ "avg_ratio": avg_ratio,
82
+ "kernels_where_amd_faster": amd_faster,
83
+ "kernels_where_nvidia_faster": nvidia_faster,
84
+ },
85
+ )
86
+
87
+
88
+ def analyze_same_kernels_from_alignment(
89
+ layer_alignments: list[LayerAlignment],
90
+ ) -> dict[str, Any]:
91
+ """Analyze same kernels from alignment data (for API compatibility).
92
+
93
+ Args:
94
+ layer_alignments: List of aligned layers
95
+
96
+ Returns:
97
+ Dictionary with same kernel analysis results
98
+ """
99
+ analysis = analyze_same_kernels(layer_alignments)
100
+
101
+ kernels = [
102
+ {
103
+ "layer": k.layer,
104
+ "kernel_name": k.kernel_name,
105
+ "operation": k.operation,
106
+ "amd_avg_us": k.amd_avg_us,
107
+ "nvidia_avg_us": k.nvidia_avg_us,
108
+ "ratio": k.ratio,
109
+ "gap_us": k.gap_us,
110
+ "amd_count": k.amd_count,
111
+ "nvidia_count": k.nvidia_count,
112
+ }
113
+ for k in analysis.kernels
114
+ ]
115
+
116
+ return {
117
+ "kernels": kernels,
118
+ "summary": analysis.summary,
119
+ }
@@ -0,0 +1,99 @@
1
+ """Warning detection and reporting for trace analysis.
2
+
3
+ Detects issues with trace data quality and provides actionable suggestions.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from typing import Literal
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class TraceWarning:
12
+ """A warning about trace data quality or analysis limitations."""
13
+
14
+ code: str # e.g., "NO_PHASE_ANNOTATIONS", "NO_PYTHON_STACKS"
15
+ severity: Literal["info", "warning", "error"]
16
+ message: str
17
+ suggestion: str
18
+
19
+
20
+ def detect_warnings(
21
+ events: list[dict],
22
+ kernel_names: list[str],
23
+ phases: list[dict] | None = None,
24
+ layers_detected: int = 0,
25
+ total_kernels: int = 0,
26
+ ) -> list[TraceWarning]:
27
+ """Detect warnings from trace data.
28
+
29
+ Args:
30
+ events: All trace events
31
+ kernel_names: List of all kernel names
32
+ phases: Optional list of phase events (for checking phase annotations)
33
+ layers_detected: Number of layers detected
34
+ total_kernels: Total number of kernels
35
+
36
+ Returns:
37
+ List of warnings
38
+ """
39
+ warnings: list[TraceWarning] = []
40
+
41
+ # Check for phase annotations
42
+ has_phase_annotations = any(
43
+ ev.get("cat") == "user_annotation" and ev.get("name", "").startswith("execute_context")
44
+ for ev in events
45
+ )
46
+
47
+ if not has_phase_annotations:
48
+ warnings.append(
49
+ TraceWarning(
50
+ code="NO_PHASE_ANNOTATIONS",
51
+ severity="warning",
52
+ message="No vLLM phase annotations found. Phase analysis (prefill/decode) will be unavailable.",
53
+ suggestion="Ensure you're using vLLM v1.0+ with profiling enabled. Re-profile with torch.profiler.profile() to capture phase markers.",
54
+ )
55
+ )
56
+
57
+ # Check for Python stack traces
58
+ has_python_stacks = any(
59
+ ev.get("cat") == "python_function"
60
+ for ev in events
61
+ )
62
+
63
+ if not has_python_stacks:
64
+ warnings.append(
65
+ TraceWarning(
66
+ code="NO_PYTHON_STACKS",
67
+ severity="info",
68
+ message="No Python stack traces available. CPU→kernel mapping will be limited.",
69
+ suggestion="Re-profile with with_stack=True: torch.profiler.profile(with_stack=True) for better CPU operator identification.",
70
+ )
71
+ )
72
+
73
+ # Check for high percentage of unknown kernels
74
+ if total_kernels > 0:
75
+ unknown_count = sum(1 for name in kernel_names if "unknown" in name.lower() or name == "Other")
76
+ unknown_percentage = (unknown_count / total_kernels) * 100
77
+
78
+ if unknown_percentage > 20:
79
+ warnings.append(
80
+ TraceWarning(
81
+ code="HIGH_UNKNOWN_KERNELS",
82
+ severity="warning",
83
+ message=f"{unknown_percentage:.1f}% of kernels are classified as 'Unknown'. Kernel registry may be outdated.",
84
+ suggestion="Update kernel pattern registry or report unrecognized kernel patterns for support.",
85
+ )
86
+ )
87
+
88
+ # Check for layer detection failure
89
+ if layers_detected == 0 and total_kernels > 100:
90
+ warnings.append(
91
+ TraceWarning(
92
+ code="LAYER_DETECTION_FAILED",
93
+ severity="warning",
94
+ message="No transformer layers detected. Layer-wise analysis unavailable.",
95
+ suggestion="This may indicate a non-transformer model (e.g., SSM/Mamba) or insufficient correlation data. Check model architecture.",
96
+ )
97
+ )
98
+
99
+ return warnings
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wafer-core
3
- Version: 0.1.26
3
+ Version: 0.1.27
4
4
  Summary: Core utilities and environments for Wafer GPU kernel optimization
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: aiohttp>=3.9.0
@@ -15,12 +15,14 @@ Requires-Dist: dash-svg>=0.0.11
15
15
  Requires-Dist: dash>=3.0.0
16
16
  Requires-Dist: dnspython>=2.8.0
17
17
  Requires-Dist: httpx>=0.25.0
18
+ Requires-Dist: ijson>=3.2.0
18
19
  Requires-Dist: kaleido==0.2.1
19
20
  Requires-Dist: markdownify>=0.11.0
20
21
  Requires-Dist: matplotlib>=3.0.0
21
22
  Requires-Dist: modal>=0.64.0
22
23
  Requires-Dist: numpy>=1.17.5
23
24
  Requires-Dist: openai>=1.0.0
25
+ Requires-Dist: orjson>=3.9.0
24
26
  Requires-Dist: pandas~=3.0.0
25
27
  Requires-Dist: paramiko>=3.0.0
26
28
  Requires-Dist: paramiko>=3.4.0
@@ -318,12 +318,20 @@ wafer_core/lib/rocprofiler/systems/run/analyzer.py,sha256=Qg3M8-kCKdV82ehn6Ta20N
318
318
  wafer_core/lib/rocprofiler/systems/run/profiler.py,sha256=aiQLsDnfQHSeCM5zLnO4VlbTmREYnAtiuT50Eq6uWfg,8387
319
319
  wafer_core/lib/rocprofiler/systems/sample/__init__.py,sha256=31rNmLPQ7OVhvlOEEOwPKgk8_qrCidj6AmzDXexQJ_o,288
320
320
  wafer_core/lib/rocprofiler/systems/sample/profiler.py,sha256=CYZPTzNXd48LoCfmY6h_5RSYEdWYccuv3-t4YncHJLE,7384
321
- wafer_core/lib/trace_compare/__init__.py,sha256=G5vmiQnuweiF9vjK1FC4ZIy-tzuHiaLMs7QBnir8OJw,800
322
- wafer_core/lib/trace_compare/analyzer.py,sha256=o0SI1PsehpgxeUPQEB9708W_Q_ILiO5apgqVLe2xE8A,14541
323
- wafer_core/lib/trace_compare/classifier.py,sha256=sE1K007GVk_Up2g59SVUIZ7BThf0yHNjGsZ9AyM_Ah8,6028
321
+ wafer_core/lib/trace_compare/PERFORMANCE.md,sha256=jkJh7ApZi8H7NKTcz8v0LNtwSFtIUqY88e3QbL749ww,3823
322
+ wafer_core/lib/trace_compare/__init__.py,sha256=CyUPbPQDYhVLCFFA7S_jNSilG3OgqYjmHSKfR5X11go,1377
323
+ wafer_core/lib/trace_compare/aligner.py,sha256=6HplOHCUIb0cMXA-Lu-91T-hKVTMK4bk8Ei-v7HE1G4,13471
324
+ wafer_core/lib/trace_compare/analyzer.py,sha256=m-waAiU5S72M9J4kUwIy9fPWUecg_oOUczri8Na6xUY,29360
325
+ wafer_core/lib/trace_compare/api.py,sha256=JSRTcd7eZK1Z8l18TFEiA5A8ENJS1TMz7oIiw1KBbAs,8796
326
+ wafer_core/lib/trace_compare/architecture.py,sha256=8bqlAJQeJLBHblyXvFV-w55PIKiVQDPjDQZ8Jx4tuGg,2110
327
+ wafer_core/lib/trace_compare/classifier.py,sha256=CDGzY9TY-I5wRuEGsu4mTCdljqVTOnLWyFLyNgmkGXI,16864
324
328
  wafer_core/lib/trace_compare/formatter.py,sha256=GNrCZ45ueBN05CEXjOtTuKvTI8z-g-ZZFil-ni3sWVY,37962
325
- wafer_core/lib/trace_compare/fusion_analyzer.py,sha256=LwYTBjL_gHCvydfgFp-L9f_qfXq3GenJHRemygly4H8,36482
326
- wafer_core/lib/trace_compare/loader.py,sha256=E7-OS4uMqvJhGLyxKQNnAgK33YECrSjuCssUT_X0LQA,11728
329
+ wafer_core/lib/trace_compare/fusion_analyzer.py,sha256=bD_CJ3JoVg_N6vxJJULd6G8l_-O5qnLuXKDEDItcQtg,15489
330
+ wafer_core/lib/trace_compare/kernel_registry.yaml,sha256=0-knXwsF3pR1x1JdIz-aWaH-5xDgTylh53E47Kf6nHo,9808
331
+ wafer_core/lib/trace_compare/layer_segmentation.py,sha256=kI_Y1e9nrKZfdwfcrGo4h7gpMxqXI_xkgXk46zuFen4,4642
332
+ wafer_core/lib/trace_compare/loader.py,sha256=zBHI0r7CX_wJ2mz0_-s0lm9KGSdaVaq7OKyxUL6KIlw,23997
333
+ wafer_core/lib/trace_compare/same_kernel_analyzer.py,sha256=sp81NJGVJeYdAfRQRgMbB5HcGTOneF1Rau3rbLPfpv4,3489
334
+ wafer_core/lib/trace_compare/warnings.py,sha256=B1HxFt-v1mDqLT2aD5bSm1Yn88bfPYnM-wui0WBF3xM,3548
327
335
  wafer_core/lib/tracelens/__init__.py,sha256=AkHdmOnKlBO4RpsAqVVGe7MOfv6E6uhEaC_iKrYeMPI,2002
328
336
  wafer_core/lib/tracelens/comparator.py,sha256=71YEPfjBi7_24u1oQuPerNtFsN0sDQ5CT_uBi0XLllw,3460
329
337
  wafer_core/lib/tracelens/finder.py,sha256=HpbN8TuRNbbBytPYOmkBkfsFVBReQqVgsvFX-mBrln4,2459
@@ -679,6 +687,6 @@ wafer_core/utils/modal_execution/modal_app.py,sha256=VfS2cX8gHtnlPXemmMcEwDPeQdh
679
687
  wafer_core/utils/modal_execution/modal_config.py,sha256=7cGX9TGqilQ3qxI3OFGXV5orjtyRU-PEDOJ4vP2oxno,4421
680
688
  wafer_core/utils/modal_execution/modal_execution.py,sha256=gChjnV6jqA3A7IRP3DfvV5cSfm_MN0X4f7JZufXgdZE,24594
681
689
  wafer_core/utils/modal_execution/test_modal.py,sha256=_jqou_hrLs1Daf1590Pnb0a_lXMMa2rczAPpW9HpoNQ,8153
682
- wafer_core-0.1.26.dist-info/METADATA,sha256=xzTIIcsmbJkA06hTdoRb4uXZj2ud1-wnV7EXdLOSOe4,1420
683
- wafer_core-0.1.26.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
684
- wafer_core-0.1.26.dist-info/RECORD,,
690
+ wafer_core-0.1.27.dist-info/METADATA,sha256=NYiI9hCaVd9RCCAfd8Ys0UwTMju6wiyaT7nsk4gsB8A,1477
691
+ wafer_core-0.1.27.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
692
+ wafer_core-0.1.27.dist-info/RECORD,,