PyPI - wafer-core - Versions diffs - 0.1.29__py3-none-any.whl → 0.1.30__py3-none-any.whl - Mend

wafer-core 0.1.29py3-none-any.whl → 0.1.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

wafer_core/lib/trace_compare/analyzer.py CHANGED Viewed

@@ -145,7 +145,17 @@ def analyze_traces_from_loaded(
         trace2_total = trace2_agg["total_us"] / 1000
         trace1_count = int(trace1_agg["count"])
         trace2_count = int(trace2_agg["count"])
-        ratio = trace1_avg / trace2_avg if trace2_avg > 0 else 1
+        # Speedup: ratio of total times (not per-call averages)
+        # Shows how many times faster/slower trace1 is compared to trace2
+        # > 1.0 means trace1 is slower, < 1.0 means trace1 is faster
+        # Using total time instead of avg time per call because operations may have
+        # vastly different call counts (e.g., fused vs unfused operations)
+        if trace2_total > 0:
+            ratio = trace1_total / trace2_total
+        elif trace1_total > 0:
+            ratio = float("inf")  # trace2 has no time, trace1 is infinitely slower
+        else:
+            ratio = 1.0  # Both are zero
         gap_ms = trace1_total - trace2_total
         trace1_pattern = list(
@@ -446,6 +456,11 @@ def analyze_traces_aligned(
     )
     same_kernel_result = analyze_same_kernels_from_alignment(alignment.layer_alignments)
+    # Note: amd_kernels = trace1's kernels (filtered if phase_filter != "all")
+    #       nvidia_kernels = trace2's kernels (filtered if phase_filter != "all")
+    # The variable names are misleading but trace1_* should use amd_kernels,
+    # and trace2_* should use nvidia_kernels to match the filtered kernel counts/totals.
     return {
         "metadata": {
             "amd_gpu": amd_trace.gpu_name,
@@ -462,10 +477,10 @@ def analyze_traces_aligned(
             "trace2_platform": trace2.platform,
             "trace2_gpu": trace2.gpu_name,
             "trace2_device": trace2.device_props,
-            "trace1_kernels": len(amd_trace.kernel_events),
-            "trace2_kernels": len(nvidia_trace.kernel_events),
-            "trace1_total_ms": sum(k.get("dur", 0) for k in amd_trace.kernel_events) / 1000,
-            "trace2_total_ms": sum(k.get("dur", 0) for k in nvidia_trace.kernel_events) / 1000,
+            "trace1_kernels": len(amd_kernels),
+            "trace2_kernels": len(nvidia_kernels),
+            "trace1_total_ms": sum(k.get("dur", 0) for k in amd_kernels) / 1000,
+            "trace2_total_ms": sum(k.get("dur", 0) for k in nvidia_kernels) / 1000,
             "phase": phase_filter,
             "trace1_layers": alignment.num_layers,
             "trace2_layers": alignment.num_layers,
@@ -579,7 +594,17 @@ def analyze_traces_aligned(
         trace2_total = trace2_agg["total_us"] / 1000
         trace1_count = int(trace1_agg["count"])
         trace2_count = int(trace2_agg["count"])
-        ratio = trace1_avg / trace2_avg if trace2_avg > 0 else 1
+        # Speedup: ratio of total times (not per-call averages)
+        # Shows how many times faster/slower trace1 is compared to trace2
+        # > 1.0 means trace1 is slower, < 1.0 means trace1 is faster
+        # Using total time instead of avg time per call because operations may have
+        # vastly different call counts (e.g., fused vs unfused operations)
+        if trace2_total > 0:
+            ratio = trace1_total / trace2_total
+        elif trace1_total > 0:
+            ratio = float("inf")  # trace2 has no time, trace1 is infinitely slower
+        else:
+            ratio = 1.0  # Both are zero
         gap_ms = trace1_total - trace2_total
         trace1_pattern = list(

{wafer_core-0.1.29.dist-info → wafer_core-0.1.30.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-core
-Version: 0.1.29
+Version: 0.1.30
 Summary: Core utilities and environments for Wafer GPU kernel optimization
 Requires-Python: >=3.10
 Requires-Dist: aiohttp>=3.9.0

{wafer_core-0.1.29.dist-info → wafer_core-0.1.30.dist-info}/RECORD RENAMED Viewed

@@ -321,7 +321,7 @@ wafer_core/lib/rocprofiler/systems/sample/profiler.py,sha256=CYZPTzNXd48LoCfmY6h
 wafer_core/lib/trace_compare/PERFORMANCE.md,sha256=jkJh7ApZi8H7NKTcz8v0LNtwSFtIUqY88e3QbL749ww,3823
 wafer_core/lib/trace_compare/__init__.py,sha256=CyUPbPQDYhVLCFFA7S_jNSilG3OgqYjmHSKfR5X11go,1377
 wafer_core/lib/trace_compare/aligner.py,sha256=1S8Ob3RaEsIjN0HdqEx0yGsW5uf_lMrJVSH_MnZhKok,13788
-wafer_core/lib/trace_compare/analyzer.py,sha256=YkuOPA3HFX_7mNUEhE9CMOtEMGLQd12lvUkvqqeQF14,29698
+wafer_core/lib/trace_compare/analyzer.py,sha256=Ou_gooG027YVuYVF5oddAkMsObXrrPQLBPHUzSMA4Vg,31078
 wafer_core/lib/trace_compare/api.py,sha256=JSRTcd7eZK1Z8l18TFEiA5A8ENJS1TMz7oIiw1KBbAs,8796
 wafer_core/lib/trace_compare/architecture.py,sha256=8bqlAJQeJLBHblyXvFV-w55PIKiVQDPjDQZ8Jx4tuGg,2110
 wafer_core/lib/trace_compare/classifier.py,sha256=cYAmDW8S75N6cE3mJNZM-UKCJSX7rFP-8klVrukBvNQ,17504
@@ -697,6 +697,6 @@ wafer_core/utils/modal_execution/modal_app.py,sha256=VfS2cX8gHtnlPXemmMcEwDPeQdh
 wafer_core/utils/modal_execution/modal_config.py,sha256=7cGX9TGqilQ3qxI3OFGXV5orjtyRU-PEDOJ4vP2oxno,4421
 wafer_core/utils/modal_execution/modal_execution.py,sha256=gChjnV6jqA3A7IRP3DfvV5cSfm_MN0X4f7JZufXgdZE,24594
 wafer_core/utils/modal_execution/test_modal.py,sha256=_jqou_hrLs1Daf1590Pnb0a_lXMMa2rczAPpW9HpoNQ,8153
-wafer_core-0.1.29.dist-info/METADATA,sha256=Qjyx92KhI1joutpM8lF0G1zgPou-d8CdzWI80QQqKYg,1477
-wafer_core-0.1.29.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-wafer_core-0.1.29.dist-info/RECORD,,
+wafer_core-0.1.30.dist-info/METADATA,sha256=YuF3VyyP3tvmv2S-7E8epi1J2_1e2yXJfapS1uGQ0Zs,1477
+wafer_core-0.1.30.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+wafer_core-0.1.30.dist-info/RECORD,,

{wafer_core-0.1.29.dist-info → wafer_core-0.1.30.dist-info}/WHEEL RENAMED Viewed

File without changes

wafer-core 0.1.29__py3-none-any.whl → 0.1.30__py3-none-any.whl

wafer-core 0.1.29py3-none-any.whl → 0.1.30py3-none-any.whl