PyPI - wafer-cli - Versions diffs - 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl - Mend

wafer-cli 0.2.25py3-none-any.whl → 0.2.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

wafer/cli.py +196 -37
wafer/corpus.py +65 -5
wafer/specs_cli.py +157 -0
wafer/targets_cli.py +472 -0
wafer/targets_ops.py +29 -2
wafer/trace_compare.py +139 -48
{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/METADATA +1 -1
{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/RECORD +11 -9
{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/WHEEL +0 -0
{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/entry_points.txt +0 -0
{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/top_level.txt +0 -0

wafer/trace_compare.py CHANGED Viewed

@@ -6,19 +6,22 @@ All core logic is in wafer_core.lib.trace_compare.
 import sys
 from pathlib import Path
+from typing import Any
 import typer
+import json
+import sys
 from wafer_core.lib.trace_compare import (
-    analyze_fusion_differences,
-    analyze_traces,
+    analyze_trace_pair,
     format_csv,
-    format_fusion_csv,
-    format_fusion_json,
-    format_fusion_text,
     format_json,
     format_text,
+    ArchitectureType,
+    detect_architecture,
 )
+from wafer_core.lib.trace_compare.loader import StreamingMetadata
 def compare_traces(
@@ -30,6 +33,7 @@ def compare_traces(
     show_layers: bool = False,
     show_all: bool = False,
     show_stack_traces: bool = False,
+    recommendations: bool = False,
 ) -> None:
     """Compare two GPU traces and generate performance report.
@@ -52,21 +56,60 @@ def compare_traces(
         typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
         raise typer.Exit(1)
-    # Analyze traces
-    # Only show progress messages for non-JSON formats (JSON needs clean stdout)
+    # Progress callback for JSON format (emits NDJSON to stdout)
+    def progress_callback(stage: str, fraction: float) -> None:
+        if output_format == 'json':
+            progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
+            print(progress_msg, file=sys.stdout, flush=True)
+        elif output_format != 'json':
+            percent = int(fraction * 100)
+            typer.echo(f"📊 {stage}: {percent}%", err=True)
+    # Metadata callback for JSON format (emits NDJSON with early GPU info)
+    def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
+        if output_format == 'json':
+            metadata_msg = json.dumps({
+                "type": "metadata",
+                "trace1": {
+                    "platform": meta1.platform,
+                    "gpu": meta1.gpu_name,
+                    "file_size_mb": round(meta1.file_size_mb, 1),
+                },
+                "trace2": {
+                    "platform": meta2.platform,
+                    "gpu": meta2.gpu_name,
+                    "file_size_mb": round(meta2.file_size_mb, 1),
+                },
+            })
+            print(metadata_msg, file=sys.stdout, flush=True)
+        else:
+            typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
+            typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
+    # Analyze traces using unified API
     if output_format != 'json':
         typer.echo("📊 Loading traces...")
-    # Determine how many stack traces to collect
-    max_stacks = 0 if (show_stack_traces and show_all) else (3 if show_stack_traces else 3)
     try:
-        results = analyze_traces(
+        result_obj = analyze_trace_pair(
             trace1,
             trace2,
-            phase_filter=phase,
-            max_stacks=max_stacks,
+            phase=phase,
+            include_stacks=True,
+            on_progress=progress_callback,
+            on_metadata=metadata_callback,
         )
+        results = {
+            "metadata": result_obj.metadata,
+            "operations": result_obj.operations,
+            "layers": result_obj.layers,
+            "warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
+            "architecture": result_obj.architecture.value,
+            "layer_alignments": result_obj.layer_alignments,
+            "fusion_analysis": result_obj.fusion_analysis,
+            "same_kernel_analysis": result_obj.same_kernel_analysis,
+        }
     except ValueError as e:
         typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
         raise typer.Exit(1)
@@ -74,17 +117,26 @@ def compare_traces(
         typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
         raise typer.Exit(1)
-    # Show loading confirmation
     if output_format != 'json':
         meta = results["metadata"]
-        # Determine which trace is AMD and which is NVIDIA
         if meta['trace1_platform'] == 'AMD':
             amd_gpu, nvidia_gpu = meta['trace1_gpu'], meta['trace2_gpu']
         else:
             amd_gpu, nvidia_gpu = meta['trace2_gpu'], meta['trace1_gpu']
         typer.echo(f"✅ Loaded: AMD ({amd_gpu}) vs NVIDIA ({nvidia_gpu})")
+        # Display warnings
+        warnings = results.get("warnings", [])
+        if warnings:
+            typer.echo()
+            for warning in warnings:
+                icon = "❌" if warning["severity"] == "error" else "⚠️" if warning["severity"] == "warning" else "ℹ️"
+                typer.secho(f"{icon}  {warning['message']}", fg=typer.colors.YELLOW if warning["severity"] == "warning" else typer.colors.BLUE)
+                if warning.get("suggestion"):
+                    typer.secho(f"   Suggestion: {warning['suggestion']}", fg=typer.colors.BLUE)
     typer.echo()
     # Generate output based on format
     if output_format == "text":
         output_str = format_text(results, show_layers=show_layers, show_all=show_all, show_stack_traces=show_stack_traces)
@@ -108,21 +160,23 @@ def compare_traces(
         typer.echo(output_str)
-def compare_fusion(
+def compare_align(
     trace1: Path,
     trace2: Path,
     output: Path | None = None,
-    format_type: str = "text",
-    min_group_size: int = 50,
+    output_format: str = "json",
+    phase: str = "all",
+    layer: int | None = None,
 ) -> None:
-    """Analyze kernel fusion differences between AMD and NVIDIA traces.
+    """Align kernels at layer level for exact kernel-to-kernel comparison.
     Args:
         trace1: Path to first trace file (AMD or NVIDIA)
         trace2: Path to second trace file (AMD or NVIDIA)
         output: Optional output file path (default: stdout)
-        format_type: Output format ('text', 'csv', or 'json')
-        min_group_size: Minimum correlation group size to analyze
+        output_format: Output format ('json' only for now)
+        phase: Filter by phase ('all', 'prefill', or 'decode')
+        layer: Focus on specific layer number (optional)
     """
     # Validate files exist
     if not trace1.exists():
@@ -133,49 +187,86 @@ def compare_fusion(
         typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
         raise typer.Exit(1)
-    # Analyze fusion
-    # Only show progress messages for non-JSON formats (JSON needs clean stdout)
-    if format_type != 'json':
+    # Progress callback for JSON format (emits NDJSON to stdout)
+    def progress_callback(stage: str, fraction: float) -> None:
+        if output_format == 'json':
+            progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
+            print(progress_msg, file=sys.stdout, flush=True)
+        else:
+            percent = int(fraction * 100)
+            typer.echo(f"📊 {stage}: {percent}%", err=True)
+    # Metadata callback for JSON format
+    def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
+        if output_format == 'json':
+            metadata_msg = json.dumps({
+                "type": "metadata",
+                "trace1": {
+                    "platform": meta1.platform,
+                    "gpu": meta1.gpu_name,
+                    "file_size_mb": round(meta1.file_size_mb, 1),
+                },
+                "trace2": {
+                    "platform": meta2.platform,
+                    "gpu": meta2.gpu_name,
+                    "file_size_mb": round(meta2.file_size_mb, 1),
+                },
+            })
+            print(metadata_msg, file=sys.stdout, flush=True)
+        else:
+            typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
+            typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
+    # Analyze traces using unified API
+    if output_format != 'json':
         typer.echo("📊 Loading traces...")
     try:
-        results = analyze_fusion_differences(
+        result_obj = analyze_trace_pair(
             trace1,
             trace2,
-            min_group_size=min_group_size,
+            phase=phase,
+            include_stacks=True,
+            on_progress=progress_callback,
+            on_metadata=metadata_callback,
         )
+        results = {
+            "metadata": result_obj.metadata,
+            "layer_alignments": result_obj.layer_alignments or [],
+            "fusion_analysis": result_obj.fusion_analysis or {},
+            "same_kernel_analysis": result_obj.same_kernel_analysis or {},
+            "operations": result_obj.operations,
+            "layers": result_obj.layers,
+            "warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
+            "architecture": result_obj.architecture.value,
+        }
+        if layer is not None:
+            results["layer_alignments"] = [
+                la for la in results["layer_alignments"] if la.get("layer") == layer
+            ]
+    except ValueError as e:
+        typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
+        raise typer.Exit(1)
     except Exception as e:
-        typer.secho(
-            f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True
-        )
+        typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
         import traceback
         traceback.print_exc()
         raise typer.Exit(1)
-    # Show loading confirmation
-    if format_type != 'json':
+    if output_format != 'json':
         meta = results["metadata"]
-        # Note: fusion analyzer always uses trace1=AMD, trace2=NVIDIA
-        typer.echo(f"✅ Loaded: {meta['trace1_gpu']} vs {meta['trace2_gpu']}")
-        typer.echo(
-            f"Found {meta['trace1_correlation_groups']} trace1 groups and "
-            f"{meta['trace2_correlation_groups']} trace2 groups with ≥{min_group_size} kernels"
-        )
-        typer.echo(f"✅ Matched {meta['matched_groups']} correlation groups")
+        typer.echo(f"✅ Loaded: {meta.get('amd_gpu', 'Unknown')} vs {meta.get('nvidia_gpu', 'Unknown')}")
+        typer.echo(f"✅ Found {len(results['layer_alignments'])} layers")
         typer.echo()
-    # Generate output
-    if format_type == "text":
-        output_str = format_fusion_text(results)
-    elif format_type == "csv":
-        output_str = format_fusion_csv(results)
-    elif format_type == "json":
-        output_str = format_fusion_json(results)
+    if output_format == "json":
+        output_str = format_json(results)
     else:
-        typer.secho(f"❌ Unknown format: {format_type}", fg=typer.colors.RED, err=True)
+        typer.secho(f"❌ Format {output_format} not yet supported for align command. Use 'json'.", fg=typer.colors.RED, err=True)
         raise typer.Exit(1)
-    # Write output
     if output:
         output.write_text(output_str)
         typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)

{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-cli
-Version: 0.2.25
+Version: 0.2.27
 Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown

{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/RECORD RENAMED Viewed

@@ -6,10 +6,10 @@ wafer/api_client.py,sha256=i_Az2b2llC3DSW8yOL-BKqa7LSKuxOr8hSN40s-oQXY,6313
 wafer/auth.py,sha256=dwss_se5P-FFc9IN38q4kh_dBrA6k-CguDBkivgcdj0,14003
 wafer/autotuner.py,sha256=41WYP41pTDvMijv2h42vm89bcHtDMJXObDlWmn6xpFU,44416
 wafer/billing.py,sha256=hEEwtrtIsbPQ3lLJNcyTLMsapUbcuvcVW_e9_0SxzVo,7199
-wafer/cli.py,sha256=vboIOEGLWrNUejSWfO0bcQ0IJOAR6Inva7r7PeYb6jI,277592
+wafer/cli.py,sha256=zuVZhPdML5AOBtLUqLwAwjl8XMNe9EwQkffZxtBGLx4,282748
 wafer/cli_instructions.py,sha256=bziUKDNDAXABVMvKPLEMXm-hFSD2TcFSh-FKRYa949k,4693
 wafer/config.py,sha256=h5Eo9_yfWqWGoPNdVQikI9GoZVUeysunSYiixf1mKcw,3411
-wafer/corpus.py,sha256=B7xHNP_ssGbkL0DpXeXisycm_SxrLv5s4oss735GRWI,22567
+wafer/corpus.py,sha256=CY9T7wXENNDJxnrtI-XsQmXeptrFfKG4x-lngrc9_3s,24748
 wafer/evaluate.py,sha256=HMFQD-uwC6Wky1t_0JxYZaoHWgLaTBkjxOxgpZVnGrc,190519
 wafer/global_config.py,sha256=fhaR_RU3ufMksDmOohH1OLeQ0JT0SDW1hEip_zaP75k,11345
 wafer/gpu_run.py,sha256=TwqXy72T7f2I7e6n5WWod3xgxCPnDhU0BgLsB4CUoQY,9716
@@ -23,11 +23,13 @@ wafer/problems.py,sha256=ce2sy10A1nnNUG3VGsseTS8jL7LZsku4dE8zVf9JHQ4,11296
 wafer/rocprof_compute.py,sha256=n_yOGZaFbOXna_ghhmYWXeyUoSabgH4KkjlYq38DlHo,19888
 wafer/rocprof_sdk.py,sha256=0Q7Ye6dUfa1anFZbqKc21rItgqva8V8VIZoSB7wqbmA,10085
 wafer/rocprof_systems.py,sha256=4IWbMcbYk1x_8iS7P3FC_u5sgH6EXADCtR2lV9id80M,18629
+wafer/specs_cli.py,sha256=frMEKwMflxVNpFlAuxprmr33ZZ1Oeh2lB0KWZ4oZWzw,4360
 wafer/ssh_keys.py,sha256=MxiHlSm6wuDUFzkOQtx5K7OIbx_a6bXxE-m8OpwLx98,8130
 wafer/target_lock.py,sha256=SDKhNzv2N7gsphGflcNni9FE5YYuAMuEthngAJEo4Gs,7809
 wafer/targets.py,sha256=9r-iRWoKSH5cQl1LcamaX-T7cNVOg99ngIm_hlRk-qU,26922
-wafer/targets_ops.py,sha256=jN1oIBx0mutxRNE9xpIc7SaBxPkVmOyus2eqn0kEKNI,21475
-wafer/trace_compare.py,sha256=IBVSGI8u5A10haDzL4eQ0R24fM1G_dd1F3-4iEkG1EQ,6349
+wafer/targets_cli.py,sha256=Oe3e02rSXeNrMbe_Qv9DNfQ8dEOKodtU7BbQQWxlNwA,16348
+wafer/targets_ops.py,sha256=wLPyq55H_wz0wEAEg8KFLYs9LIIyiVIphcsXD2NLa-E,22623
+wafer/trace_compare.py,sha256=COuxxKY874DteOSLUvJuJFREPMBSybq9dtANi3ATsg4,10803
 wafer/tracelens.py,sha256=g9ZIeFyNojZn4uTd3skPqIrRiL7aMJOz_-GOd3aiyy4,7998
 wafer/wevin_cli.py,sha256=eo1ETsXIsCftXSG5AxEYYZipNGcXayKyIevs5F6MjXg,26140
 wafer/workspaces.py,sha256=J-TXGwHXSZlzRWCew63KNvk6HLJ-zTSELRgzjryTkMk,35710
@@ -38,8 +40,8 @@ wafer/templates/optimize_kernel.py,sha256=4-MaKm_C9BQHQEllrNLLYkcdhJpcj6D-8zbJ4F
 wafer/templates/optimize_kernelbench.py,sha256=T3co9Y9eSLWDrZG66gwQVFMdnGVoyUQos-TxnMMBLL8,3747
 wafer/templates/trace_analyze.py,sha256=B7CiRlsokERzBjLL-k49kGjpU2zlJZqzTE05xbRS1WI,2878
 wafer/tests/test_eval_cli_parity.py,sha256=SGmaj2NGBZ7GdDF53bXsECvQbV21iHZw8YeL_MJOLk0,7206
-wafer_cli-0.2.25.dist-info/METADATA,sha256=sJEMfe-FTEqIj_Ij9gYek8BMB3_MD1vbMvJ9VhK8Qow,2799
-wafer_cli-0.2.25.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-wafer_cli-0.2.25.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
-wafer_cli-0.2.25.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
-wafer_cli-0.2.25.dist-info/RECORD,,
+wafer_cli-0.2.27.dist-info/METADATA,sha256=Atrnv-RhoThH0JF_gczLjsQluDb3ycEsvRwn16RDf8k,2799
+wafer_cli-0.2.27.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+wafer_cli-0.2.27.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
+wafer_cli-0.2.27.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
+wafer_cli-0.2.27.dist-info/RECORD,,

{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/WHEEL RENAMED Viewed

File without changes

{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/top_level.txt RENAMED Viewed

File without changes

wafer-cli 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl

wafer-cli 0.2.25py3-none-any.whl → 0.2.27py3-none-any.whl