PyPI - wafer-core - Versions diffs - 0.1.25__py3-none-any.whl → 0.1.26__py3-none-any.whl - Mend

wafer-core 0.1.25py3-none-any.whl → 0.1.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

wafer_core/lib/trace_compare/__init__.py +32 -0
wafer_core/lib/trace_compare/analyzer.py +339 -0
wafer_core/lib/trace_compare/classifier.py +192 -0
wafer_core/lib/trace_compare/formatter.py +951 -0
wafer_core/lib/trace_compare/fusion_analyzer.py +890 -0
wafer_core/lib/trace_compare/loader.py +336 -0
wafer_core/problem_config.py +3 -3
wafer_core/rollouts/agent_presets/rlm_01_01.py +2 -2
wafer_core/rollouts/dtypes.py +18 -3
wafer_core/rollouts/providers/anthropic.py +35 -3
wafer_core/utils/kernel_utils/defense.py +10 -0
wafer_core/utils/kernel_utils/targets/config.py +10 -0
{wafer_core-0.1.25.dist-info → wafer_core-0.1.26.dist-info}/METADATA +1 -1
{wafer_core-0.1.25.dist-info → wafer_core-0.1.26.dist-info}/RECORD +15 -9
{wafer_core-0.1.25.dist-info → wafer_core-0.1.26.dist-info}/WHEEL +0 -0

wafer_core/lib/trace_compare/loader.py ADDED Viewed

@@ -0,0 +1,336 @@
+"""Trace loading and parsing logic.
+Loads JSON trace files from AMD/NVIDIA profilers and extracts kernel execution data,
+Python call stacks, CPU operator mappings, and layer correlations.
+"""
+import bisect
+import json
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+import pandas as pd
+from .classifier import classify
+def extract_layer_mapping(events: list[dict[str, Any]], platform: str) -> dict[int, int]:
+    """Extract correlation ID to layer number mapping.
+    vLLM's execution graph creates large correlation groups for full transformer layers.
+    Each layer's forward pass (norm + attention + FFN) gets grouped under one correlation ID,
+    containing 200-400 kernels depending on batch size and sequence length.
+    We identify layers as correlation groups with many kernels (70+), which filters out
+    individual operations like sampling, logit processing, etc.
+    Args:
+        events: List of trace events
+        platform: 'AMD' or 'NVIDIA'
+    Returns:
+        Dict mapping correlation ID to layer number
+    """
+    # Group kernels by correlation ID
+    correlation_groups = defaultdict(
+        lambda: {"count": 0, "has_attention": False, "has_ffn": False}
+    )
+    for ev in events:
+        if ev.get("cat") != "kernel":
+            continue
+        corr_id = ev.get("args", {}).get("correlation")
+        if corr_id is None:
+            continue
+        kernel_name = ev.get("name", "").lower()
+        # Track what operations this correlation contains
+        correlation_groups[corr_id]["count"] += 1
+        if "attention" in kernel_name or "fmha" in kernel_name:
+            correlation_groups[corr_id]["has_attention"] = True
+        if any(x in kernel_name for x in ["cijk_", "nvjet", "wvsplitk", "gemm"]):
+            correlation_groups[corr_id]["has_ffn"] = True
+    # Map correlation IDs to layer numbers
+    # Transformer layers have many kernels AND contain both attention and FFN ops
+    correlation_to_layer = {}
+    layer_num = 0
+    for corr_id in sorted(correlation_groups.keys()):
+        group = correlation_groups[corr_id]
+        # Identify complete transformer layers by their characteristics:
+        # - Has attention operations (self-attention or cross-attention)
+        # - Has FFN operations (feed-forward network)
+        # - Has sufficient kernel count (70+): typical transformer block has ~80-100 kernels
+        #   including attention QKV projections, softmax, output projection, FFN layers,
+        #   normalization, and elementwise ops. This threshold filters out:
+        #   - Individual operations (1-10 kernels)
+        #   - Sampling/generation steps (20-40 kernels)
+        #   - Partial layer executions
+        is_layer = (
+            group["count"] >= 70 and group["has_attention"] and group["has_ffn"]
+        )
+        if is_layer:
+            correlation_to_layer[corr_id] = layer_num
+            layer_num += 1
+    return correlation_to_layer
+def _build_python_stack_index(
+    events: list[dict[str, Any]],
+) -> tuple[list[tuple[int, int, int, int | None, str]], dict[int, dict[str, Any]]]:
+    """Build Python call stack index for kernels.
+    Args:
+        events: List of trace events
+    Returns:
+        Tuple of (python_intervals, python_by_id)
+    """
+    python_by_id: dict[int, dict[str, Any]] = {}
+    python_intervals: list[tuple[int, int, int, int | None, str]] = []
+    for ev in events:
+        if ev.get("cat") == "python_function":
+            py_id = ev.get("args", {}).get("Python id")
+            name = ev["name"]
+            ts_start = ev["ts"]
+            ts_end = ts_start + ev.get("dur", 0)
+            duration = ev.get("dur", 0)
+            parent_id = ev.get("args", {}).get("Python parent id")
+            python_intervals.append((ts_start, ts_end, duration, py_id, name))
+            if py_id is not None:
+                python_by_id[py_id] = {
+                    "name": name,
+                    "parent_id": parent_id,
+                    "ts_start": ts_start,
+                    "ts_end": ts_end,
+                    "duration": duration,
+                }
+    # Sort by start time for efficient binary search
+    python_intervals.sort()
+    return python_intervals, python_by_id
+def _get_python_stack_full(
+    timestamp: int,
+    python_intervals: list[tuple[int, int, int, int | None, str]],
+    python_by_id: dict[int, dict[str, Any]],
+) -> tuple[str | None, list[str]]:
+    """Get full Python call stack for a kernel launch.
+    Args:
+        timestamp: Kernel launch timestamp
+        python_intervals: Sorted list of Python function intervals
+        python_by_id: Mapping of Python ID to function info
+    Returns:
+        Tuple of (summary_string, full_stack_list)
+    """
+    # Binary search for Python functions active at this timestamp
+    idx = bisect.bisect_right(
+        python_intervals, (timestamp, float("inf"), float("inf"), None, "")
+    )
+    # Find active functions
+    active_funcs = []
+    for i in range(idx - 1, max(0, idx - 1000), -1):
+        ts_start, ts_end, duration, py_id, name = python_intervals[i]
+        if ts_start <= timestamp <= ts_end:
+            active_funcs.append((duration, py_id, name))
+        if ts_end < timestamp - 1000000:  # 1 second before
+            break
+    if not active_funcs:
+        return None, []
+    # Get the innermost (most specific) function
+    active_funcs.sort()
+    leaf_duration, leaf_id, leaf_name = active_funcs[0]
+    # Walk up parent chain to get FULL stack
+    full_stack = []
+    current_id = leaf_id
+    visited = set()
+    while (
+        current_id is not None
+        and current_id not in visited
+        and current_id in python_by_id
+    ):
+        func = python_by_id[current_id]
+        name = func["name"]
+        full_stack.append(name)
+        visited.add(current_id)
+        current_id = func["parent_id"]
+        # Safety limit: prevent infinite loops from circular parent references
+        # and bound memory usage. 50 frames is deeper than typical Python stacks.
+        if len(full_stack) >= 50:
+            break
+    # Reverse so it's outermost -> innermost
+    full_stack.reverse()
+    # Create summary for text output: show the most informative vLLM/model function
+    summary = None
+    vllm_funcs = [
+        f
+        for f in full_stack
+        if any(x in f.lower() for x in ["vllm/", "model", "<eval_with_key>"])
+    ]
+    if vllm_funcs:
+        # Get innermost vLLM function (most specific)
+        summary = vllm_funcs[-1]
+        # Check if it's a CUDA graph - add annotation
+        if any("torch/cuda/graphs" in f for f in full_stack):
+            # Shorten if too long
+            if len(summary) > 45:
+                parts = summary.split("/")[-1]
+                summary = "vllm/..." + parts
+            summary = f"{summary} [CUDA graph]"
+        elif len(summary) > 53:
+            parts = summary.split("/")[-1]
+            summary = "vllm/..." + parts
+    else:
+        # Fallback to innermost function
+        summary = leaf_name
+    return summary, full_stack
+def load_trace(
+    file_path: str | Path,
+) -> tuple[str, str, dict[str, Any], pd.DataFrame, dict[tuple[str, str], set[str]], dict[int, int]]:
+    """Load trace and return platform info, device properties, kernels, patterns, and layer mapping.
+    Args:
+        file_path: Path to JSON trace file
+    Returns:
+        Tuple of (platform, gpu_name, device_props, kernel_df, kernel_patterns, layer_mapping)
+    """
+    with open(file_path, "rb") as f:
+        trace = json.load(f)
+    props = trace.get("deviceProperties", [{}])[0]
+    is_amd = trace.get("roctracer_version") or props.get("warpSize") == 64
+    platform = "AMD" if is_amd else "NVIDIA"
+    gpu_name = props.get("name", "MI300X" if is_amd else "Unknown GPU")
+    # Extract relevant device properties
+    device_props = {
+        "name": gpu_name,
+        "compute_capability": f"{props.get('computeMajor', 0)}.{props.get('computeMinor', 0)}",
+        "total_memory_gb": props.get("totalGlobalMem", 0) / (1024**3),
+        "sm_count": props.get("numSms", 0),
+        "warp_size": props.get("warpSize", 32),
+        "max_threads_per_block": props.get("maxThreadsPerBlock", 0),
+        "shared_mem_per_block_kb": props.get("sharedMemPerBlock", 0) / 1024,
+    }
+    events = trace.get("traceEvents", [])
+    # Build mapping: external_id -> CPU operator name
+    external_to_cpu = {}
+    for ev in events:
+        if ev.get("cat") == "cpu_op":
+            ext_id = ev.get("args", {}).get("External id")
+            cpu_op_name = ev.get("name", "")
+            if ext_id is not None:
+                external_to_cpu[ext_id] = cpu_op_name
+    # Build Python call stack index for kernels without External IDs
+    python_intervals, python_by_id = _build_python_stack_index(events)
+    # Extract phases
+    phases = []
+    for ev in events:
+        if ev.get("cat") == "user_annotation" and ev.get("name", "").startswith(
+            "execute_context"
+        ):
+            name = ev["name"]
+            # Parse execute_context_X(TOKENS)_generation_Y(Y)
+            # We want the TOKENS from execute_context, not the generation number
+            tokens = 0
+            parts = name.split("_")
+            for i, p in enumerate(parts):
+                # Look for execute_context_X(TOKENS) specifically
+                if i > 0 and parts[i-1] == "context" and "(" in p and ")" in p:
+                    try:
+                        tokens = int(p.split("(")[1].split(")")[0])
+                        break  # Stop after finding context tokens
+                    except Exception:
+                        pass
+            is_prefill = tokens >= 1024 and "generation_0" in name
+            phases.append(
+                {
+                    "type": "prefill" if is_prefill else "decode",
+                    "ts_start": ev["ts"],
+                    "ts_end": ev["ts"] + ev["dur"],
+                }
+            )
+    # Extract layer mapping from correlation IDs
+    layer_mapping = extract_layer_mapping(events, platform)
+    kernel_data = []
+    kernel_patterns: dict[tuple[str, str], set[str]] = defaultdict(set)
+    for ev in events:
+        if ev.get("cat") != "kernel":
+            continue
+        name, dur, ts = ev["name"], ev.get("dur", 0), ev["ts"]
+        corr_id = ev.get("args", {}).get("correlation")
+        ext_id = ev.get("args", {}).get("External id")
+        phase = "decode"
+        for p in phases:
+            if p["ts_start"] <= ts <= p["ts_end"]:
+                phase = p["type"]
+                break
+        op, pattern = classify(name, platform)
+        kernel_patterns[(op.value, phase)].add(pattern)
+        # Assign layer number from correlation ID
+        layer = layer_mapping.get(corr_id) if corr_id is not None else None
+        # Get CPU operator name from external ID, or fallback to Python stack
+        cpu_op = external_to_cpu.get(ext_id) if ext_id is not None else None
+        python_stack: list[str] = []
+        # If no CPU op via External ID, try Python stack trace
+        if cpu_op is None:
+            cpu_op, python_stack = _get_python_stack_full(
+                ts, python_intervals, python_by_id
+            )
+        kernel_data.append(
+            {
+                "name": name,
+                "dur_us": dur,
+                "phase": phase,
+                "op": op.value,
+                "pattern": pattern,
+                "layer": layer,
+                "correlation": corr_id,
+                "cpu_op": cpu_op,
+                "python_stack": python_stack,  # Full stack for JSON output
+            }
+        )
+    return platform, gpu_name, device_props, pd.DataFrame(kernel_data), dict(kernel_patterns), layer_mapping

wafer_core/problem_config.py CHANGED Viewed

@@ -84,7 +84,7 @@ class ProblemConfig:
     benchmarks: list[dict[str, Any]]
     # Optional with defaults
-    model: str = "claude-sonnet-4-5-20250929"
+    model: str = "claude-opus-4-5-20251101"
     temperature: float = 0.2
     max_tokens: int = 8192
     max_turns: int = 10
@@ -219,7 +219,7 @@ def _parse_config(data: dict[str, Any], base_dir: Path) -> tuple[ProblemConfig |
         reference_code=reference_code,
         tests=tests,
         benchmarks=benchmarks,
-        model=data.get("model", "claude-sonnet-4-5-20250929"),
+        model=data.get("model", "claude-opus-4-5-20251101"),
         temperature=data.get("temperature", 0.2),
         max_tokens=data.get("max_tokens", 8192),
         max_turns=data.get("max_turns", 10),
@@ -269,7 +269,7 @@ def create_problem_config_from_cli(
         reference_code=reference_code,
         tests=tests,
         benchmarks=benchmarks or tests,  # Use tests as benchmarks if not specified
-        model=kwargs.get("model", "claude-sonnet-4-5-20250929"),
+        model=kwargs.get("model", "claude-opus-4-5-20251101"),
         temperature=kwargs.get("temperature", 0.2),
         max_tokens=kwargs.get("max_tokens", 8192),
         max_turns=kwargs.get("max_turns", 10),

wafer_core/rollouts/agent_presets/rlm_01_01.py CHANGED Viewed

@@ -119,7 +119,7 @@ FINAL(42)
 config = AgentPresetConfig(
     name="rlm",
-    model="anthropic/claude-sonnet-4-5-20250929",
+    model="anthropic/claude-opus-4-5-20251101",
     env="repl",  # Uses REPLEnvironment
     thinking=True,
     system_prompt=RLM_TOOL_SYSTEM_PROMPT,
@@ -128,7 +128,7 @@ config = AgentPresetConfig(
 # Variant for message-parsing mode
 config_block_mode = AgentPresetConfig(
     name="rlm_blocks",
-    model="anthropic/claude-sonnet-4-5-20250929",
+    model="anthropic/claude-opus-4-5-20251101",
     env="repl_blocks",  # Uses MessageParsingREPLEnvironment
     thinking=True,
     system_prompt=RLM_BLOCK_SYSTEM_PROMPT,

wafer_core/rollouts/dtypes.py CHANGED Viewed

@@ -1238,6 +1238,12 @@ class Endpoint(JsonSerializable):
     api_base: str = ""
     api_key: str = ""
     oauth_token: str = ""  # OAuth bearer token (takes precedence over api_key for Anthropic)
+    # TODO: Callbacks on a frozen dataclass are a code smell. This exists because wafer-core
+    # can't depend on wafer-cli (where the Supabase refresh logic lives). A cleaner approach
+    # would be a TokenProvider protocol that Endpoint delegates to, keeping the dataclass pure.
+    api_key_refresh: Callable[[], Awaitable[str | None]] | None = field(
+        default=None, repr=False, compare=False
+    )
     is_claude_code_api_key: bool = (
         False  # API key created via Claude Code OAuth (requires special headers)
     )
@@ -1300,6 +1306,7 @@ class Endpoint(JsonSerializable):
             exclude_secrets: If True (default), omits api_key and oauth_token.
         """
         d = asdict(self)
+        d.pop("api_key_refresh", None)  # Callable, not serializable
         if exclude_secrets:
             d.pop("api_key", None)
             d.pop("oauth_token", None)
@@ -1307,7 +1314,11 @@ class Endpoint(JsonSerializable):
     @classmethod
     def from_dict(
-        cls, data: dict[str, Any], api_key: str = "", oauth_token: str = ""
+        cls,
+        data: dict[str, Any],
+        api_key: str = "",
+        oauth_token: str = "",
+        api_key_refresh: "Callable[[], Awaitable[str | None]] | None" = None,
     ) -> "Endpoint":
         """Deserialize from dict, injecting secrets at runtime.
@@ -1315,12 +1326,16 @@ class Endpoint(JsonSerializable):
             data: Dict from to_dict()
             api_key: API key to inject (not stored in session)
             oauth_token: OAuth token to inject (not stored in session)
+            api_key_refresh: Callback to refresh api_key mid-session (not stored)
         """
-        # Remove secrets if present (they shouldn't be, but be safe)
+        # Remove secrets/callables if present (they shouldn't be, but be safe)
         data = data.copy()
         data.pop("api_key", None)
         data.pop("oauth_token", None)
-        return cls(**data, api_key=api_key, oauth_token=oauth_token)
+        data.pop("api_key_refresh", None)
+        return cls(
+            **data, api_key=api_key, oauth_token=oauth_token, api_key_refresh=api_key_refresh
+        )
 @dataclass(frozen=True)

wafer_core/rollouts/providers/anthropic.py CHANGED Viewed

@@ -725,9 +725,16 @@ async def rollout_anthropic(
             oauth_token = fresh_token
         # If refresh failed, continue with existing token - it might still work
+    # Get fresh wafer proxy token if refresh callback is available
+    api_key = actor.endpoint.api_key
+    if actor.endpoint.api_key_refresh:
+        fresh_key = await actor.endpoint.api_key_refresh()
+        if fresh_key:
+            api_key = fresh_key
     client = _create_anthropic_client(
         oauth_token=oauth_token,
-        api_key=actor.endpoint.api_key,
+        api_key=api_key,
         api_base=actor.endpoint.api_base,
         max_retries=actor.endpoint.max_retries,
         timeout=actor.endpoint.timeout,
@@ -973,7 +980,7 @@ async def rollout_anthropic(
                         f"Model not found: {e}\nCheck your model ID is correct."
                     ) from e
-                # For OAuth: try to refresh token and retry once on auth errors
+                # Try to refresh token and retry once on auth errors
                 if isinstance(e, anthropic.AuthenticationError):
                     if oauth_token and attempt == 0:
                         # Emit retry event for OAuth refresh
@@ -993,12 +1000,37 @@ async def rollout_anthropic(
                             await client.close()
                             client = _create_anthropic_client(
                                 oauth_token=oauth_token,
-                                api_key=actor.endpoint.api_key,
+                                api_key=api_key,
                                 api_base=actor.endpoint.api_base,
                                 max_retries=actor.endpoint.max_retries,
                                 timeout=actor.endpoint.timeout,
                             )
                             continue
+                    # Wafer proxy token refresh (Supabase JWTs expire after ~1hr)
+                    if actor.endpoint.api_key_refresh and attempt == 0:
+                        await on_chunk(
+                            RetryStart(
+                                attempt=1,
+                                max_attempts=2,
+                                delay_seconds=0,
+                                error_message="Wafer proxy token expired, refreshing",
+                                provider="anthropic",
+                            )
+                        )
+                        fresh_key = await actor.endpoint.api_key_refresh()
+                        if fresh_key and fresh_key != api_key:
+                            api_key = fresh_key
+                            await client.close()
+                            client = _create_anthropic_client(
+                                oauth_token=oauth_token,
+                                api_key=api_key,
+                                api_base=actor.endpoint.api_base,
+                                max_retries=actor.endpoint.max_retries,
+                                timeout=actor.endpoint.timeout,
+                            )
+                            continue
                     raise FatalEvalError(
                         f"Authentication failed: {e}\nCheck your API key or OAuth token."
                     ) from e

wafer_core/utils/kernel_utils/defense.py CHANGED Viewed

@@ -12,6 +12,16 @@ Attack types defended against:
 5. Monkey-patching - Replacing CUDA timing functions with fake implementations
 Reference: "Hacks and Defenses in Automatic GPU Kernel Generation" by Jiwei Li (Dec 2025)
+TODO: Memory guard buffers (from CUDA-L2's zero_one_correctness_check.py) — wrap
+  input/output tensors with guard regions and check for out-of-bounds writes after
+  kernel execution. Catches shared memory overflow and buffer overrun at the memory
+  boundary, rather than inferring from output non-determinism.
+TODO: Exact correctness for GEMM kernels (from CUDA-L2) — use {0,1} input matrices
+  where FP16 results ≤2048 are exactly representable, enabling zero-tolerance
+  validation (torch.equal instead of torch.allclose). Eliminates the "bounded garbage
+  passes tolerance check" failure mode for matmul kernels entirely.
 """
 import random

wafer_core/utils/kernel_utils/targets/config.py CHANGED Viewed

@@ -21,6 +21,12 @@ if TYPE_CHECKING:
     from wafer_core.utils.kernel_utils.deployment import DeploymentConfig
+# TODO: Split BaremetalTarget into BaremetalTarget (persistent servers like Vultr,
+# never auto-removed) and SSHTarget (ephemeral SSH endpoints from providers like
+# RunPod/DO, safe to auto-clean when unreachable). Currently the pool bridge creates
+# ephemeral pod endpoints as type="baremetal", losing provenance. SSHTarget should
+# subclass BaremetalTarget so existing isinstance() checks still work. The `provider`
+# field is a stopgap until this split happens.
 @dataclass(frozen=True)
 class BaremetalTarget:
     """Configuration for baremetal GPU server.
@@ -59,6 +65,9 @@ class BaremetalTarget:
     gpu_type: str = "B200"
     compute_capability: str = "10.0"
     ncu_available: bool = True  # Baremetal typically has NCU
+    provider: str | None = (
+        None  # Source provider ("runpod", "digitalocean") — enables auto-cleanup when instance is gone
+    )
     # Docker execution config (Modal-like). If docker_image is set, run in container.
     docker_image: str | None = (
@@ -314,6 +323,7 @@ class RunPodTarget:
     #   apt-get install --reinstall -y rocthrust
     # See docker/rocm7-runpod/README.md for details.
     image: str = "rocm/pytorch:rocm7.0.2_ubuntu24.04_py3.12_pytorch_release_2.7.1"
+    template_id: str | None = None  # RunPod template ID for custom pod configuration
     # RunPod template ID — required for non-RunPod images that need custom
     # dockerArgs (e.g. to install and start sshd). When set, takes priority

{wafer_core-0.1.25.dist-info → wafer_core-0.1.26.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-core
-Version: 0.1.25
+Version: 0.1.26
 Summary: Core utilities and environments for Wafer GPU kernel optimization
 Requires-Python: >=3.10
 Requires-Dist: aiohttp>=3.9.0

{wafer_core-0.1.25.dist-info → wafer_core-0.1.26.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ wafer_core/async_ssh.py,sha256=ocw2Gh5p8ltKeoqG_q32DXOBfu5q-IE7jCnzMbQN9WI,28713
 wafer_core/auth.py,sha256=JpUkZ3bROIsgexayak5TLiGqUAR5kqGjekwqQRvIXH0,7235
 wafer_core/gpu.py,sha256=ENa92btjXsx6ldpoyKfRrAmfy-LHG2KpA5k7SWd6Q_s,28627
 wafer_core/gpu_detect.py,sha256=kpD8Q_G6GA9j-WnnnTNA3BBPulkGcWnZiogOmjKDao0,13650
-wafer_core/problem_config.py,sha256=8oqxL9-pvgzi8BtFxgDcqZW4e6DV2OCZOYkcPoyXrc8,10913
+wafer_core/problem_config.py,sha256=IM4ZRul4306dF7yo8wwyxXYORUZ7nz5wnphG59HN6fo,10907
 wafer_core/remote_env.py,sha256=0ACTL-A_qn2B43qgQakqGaern-pspvwBGB9iebz199k,15354
 wafer_core/remote_jobs.py,sha256=7HdBDCigSxfp32BreWoljzG5xjK6fp25rwC_6D7D04s,8306
 wafer_core/retry.py,sha256=OIvSElJZbSm4-SFBpOFuYtoX2DWGiANomCmb3qxsirM,14821
@@ -318,6 +318,12 @@ wafer_core/lib/rocprofiler/systems/run/analyzer.py,sha256=Qg3M8-kCKdV82ehn6Ta20N
 wafer_core/lib/rocprofiler/systems/run/profiler.py,sha256=aiQLsDnfQHSeCM5zLnO4VlbTmREYnAtiuT50Eq6uWfg,8387
 wafer_core/lib/rocprofiler/systems/sample/__init__.py,sha256=31rNmLPQ7OVhvlOEEOwPKgk8_qrCidj6AmzDXexQJ_o,288
 wafer_core/lib/rocprofiler/systems/sample/profiler.py,sha256=CYZPTzNXd48LoCfmY6h_5RSYEdWYccuv3-t4YncHJLE,7384
+wafer_core/lib/trace_compare/__init__.py,sha256=G5vmiQnuweiF9vjK1FC4ZIy-tzuHiaLMs7QBnir8OJw,800
+wafer_core/lib/trace_compare/analyzer.py,sha256=o0SI1PsehpgxeUPQEB9708W_Q_ILiO5apgqVLe2xE8A,14541
+wafer_core/lib/trace_compare/classifier.py,sha256=sE1K007GVk_Up2g59SVUIZ7BThf0yHNjGsZ9AyM_Ah8,6028
+wafer_core/lib/trace_compare/formatter.py,sha256=GNrCZ45ueBN05CEXjOtTuKvTI8z-g-ZZFil-ni3sWVY,37962
+wafer_core/lib/trace_compare/fusion_analyzer.py,sha256=LwYTBjL_gHCvydfgFp-L9f_qfXq3GenJHRemygly4H8,36482
+wafer_core/lib/trace_compare/loader.py,sha256=E7-OS4uMqvJhGLyxKQNnAgK33YECrSjuCssUT_X0LQA,11728
 wafer_core/lib/tracelens/__init__.py,sha256=AkHdmOnKlBO4RpsAqVVGe7MOfv6E6uhEaC_iKrYeMPI,2002
 wafer_core/lib/tracelens/comparator.py,sha256=71YEPfjBi7_24u1oQuPerNtFsN0sDQ5CT_uBi0XLllw,3460
 wafer_core/lib/tracelens/finder.py,sha256=HpbN8TuRNbbBytPYOmkBkfsFVBReQqVgsvFX-mBrln4,2459
@@ -336,7 +342,7 @@ wafer_core/rollouts/agents.py,sha256=Uv1kjYogUfdPl18YfkVxVqFTbmWfuJQrxem_iHTUgdw
 wafer_core/rollouts/cli.py,sha256=2NqgegKdlmxD0eJzGOMB5o_1Hb5t7O5JpP_32uvF2BE,80117
 wafer_core/rollouts/cli_agents.py,sha256=e4qqqYBzWLsbw8FsNnddGApWp_on9Cvzrfd1amiAyvI,20641
 wafer_core/rollouts/deploy.py,sha256=3t88fM_BMyAPkxIl8pS4r5ogHJvrlqWQDuIaltDZBRc,40924
-wafer_core/rollouts/dtypes.py,sha256=GUezPTzkd8E-nDlqdGE7idUthyZC-7jTrbpa4ye-v8k,61146
+wafer_core/rollouts/dtypes.py,sha256=oRWjpbUOTf4uyXvnO9QThcSzD1fBrDQnAfRhGbxdgrg,61916
 wafer_core/rollouts/eval_helpers.py,sha256=OE7uQZRcbqQhpFqb4zOj8zafc9Gr6xZJpSrMvxXKVUw,1699
 wafer_core/rollouts/evaluation.py,sha256=fk-pGZ5vpocVmw1iBbHtxMK0K6l8pYTLHCpDNvRY1Xo,69142
 wafer_core/rollouts/events.py,sha256=z85J8kq0LXPj5CiUk4RkiTQg--r9xiO7QeeJwkyUOto,7505
@@ -371,7 +377,7 @@ wafer_core/rollouts/agent_presets/gpt_5_1_codex_04_04.py,sha256=42NIBBYAnVoy5mbu
 wafer_core/rollouts/agent_presets/gpt_5_2_03_03.py,sha256=lEsHRUhhr8UbP5wSVKMOVDVOOtH_bQMRRgZ0dRGZMVc,1166
 wafer_core/rollouts/agent_presets/loader.py,sha256=WSkTbL7QhgMamZR5sXxep1n4cuy8LC3a4MN2phYTm-4,3666
 wafer_core/rollouts/agent_presets/opus_4_01_01.py,sha256=rurZMI-Df7O-Q-uVJj2zfY_DSjdNbMKBDZlRg9MLADc,3568
-wafer_core/rollouts/agent_presets/rlm_01_01.py,sha256=28dTtRcMmMvMu2NjduH0--8Q74ipfdSGeKvZphVcJy8,4785
+wafer_core/rollouts/agent_presets/rlm_01_01.py,sha256=jsjwDgACQxxJj4GYOUCcQvYjcICAaKV3eccQu9oyEcw,4781
 wafer_core/rollouts/agent_presets/sonnet_4_02_02.py,sha256=ZdHNxioki3wsfD6ficgB2r7HkgQDH_trCR-baGFgoHk,1269
 wafer_core/rollouts/agent_presets/sonnet_4_subagent_03_02.py,sha256=nxyjs4HWAPOAYLmPknSQr3viBXhboKC7wQ76LWB-jA0,2165
 wafer_core/rollouts/config/README.md,sha256=i0r0a3sKLkc1Eq3EqqR2Gahsgo-c8O3OZ0cCh7rp8Uw,9899
@@ -495,7 +501,7 @@ wafer_core/rollouts/prompt_optimization/adapters/system_prompt.py,sha256=CWFox1N
 wafer_core/rollouts/prompt_optimization/adapters/system_user_prompt.py,sha256=8JsSirihgZ5gacyYhn31GagyIxG0xQ7f7i4PnEupWz8,12090
 wafer_core/rollouts/prompt_optimization/adapters/terminal_bench.py,sha256=Etswuqf5dBIZQ2x2p26AXz4LT33YxT2qEeHqKXTJy18,12273
 wafer_core/rollouts/providers/__init__.py,sha256=Xu8PPDHOmF97ylMJXfE9JX2FJCanNVh7LXkHMmg0vWs,3121
-wafer_core/rollouts/providers/anthropic.py,sha256=zyN79kT9zIvhAFsCzDLnzptemBwgwXMr8rmJ_JdRTrc,43761
+wafer_core/rollouts/providers/anthropic.py,sha256=9x1GIL6JE8gutxVrLNiyAkymknIEKtl-98TnIUpFxoI,45223
 wafer_core/rollouts/providers/base.py,sha256=2ADu6pDz6yEcazo4j6-O12rs19bPewAfycjK_N03ZkY,14544
 wafer_core/rollouts/providers/google.py,sha256=IbqdXOpzSuMdI7eOZqRtzni85ysKby13PGe482Fq13w,22073
 wafer_core/rollouts/providers/openai_completions.py,sha256=3vUA74qjrxG-aOjyngtnZp0MzIhnzW5kudwxmOGxXfs,28820
@@ -655,7 +661,7 @@ wafer_core/utils/remote_execution.py,sha256=z7nLiOgmDiM_VmElLnT2LF-aKNeeKFYjXigT
 wafer_core/utils/submission_selection.py,sha256=LucdMTAbkqZA-GitSb3ZJ2pAeJ36wKqt5cTeS8xuAQ4,5655
 wafer_core/utils/kernel_utils/__init__.py,sha256=NsfKpbfpIsfupWIpIjWLGCjGAVqaONiwiWil5zXbrRc,2015
 wafer_core/utils/kernel_utils/backends.py,sha256=t3wY73Y-pVc_wALNu_bPsaFkqJ2dp2pf38KQ5ofP_go,1143
-wafer_core/utils/kernel_utils/defense.py,sha256=7tbPHEKsrzmjn1eZmfc4vg6P2jjoqMggPl5BsJwerzY,19490
+wafer_core/utils/kernel_utils/defense.py,sha256=8tHVTZlJfFcB_FWjNZfeGHwReSjG191OmFXtWXa07OM,20124
 wafer_core/utils/kernel_utils/deployment.py,sha256=-tMb3qWmAoXHWCmmT7SQBH7KBKyyLP0e5Dk6lOrTPW8,55957
 wafer_core/utils/kernel_utils/evaluate.py,sha256=1kxFNMl9VCXfKfk_BIiuA_zFfvDB1sl_feS2OEIJA1k,72346
 wafer_core/utils/kernel_utils/gpu_validation.py,sha256=LRiDjW_xAK4fXf1Vw1aYHG54B1W0J6b5L0K6PXzM2tI,3759
@@ -665,7 +671,7 @@ wafer_core/utils/kernel_utils/static_checker.py,sha256=XIQkzAOkGH5xtrOuZM4tNUqVJ
 wafer_core/utils/kernel_utils/task.py,sha256=XcmKxKUWh5It6nX3zGqj77tWgA32uPfQMqNOqyD5T48,2682
 wafer_core/utils/kernel_utils/utils.py,sha256=uDZoJDxh07hJeLNlPdKN2vgB15pqIr1LbXf0YIBHU4E,43056
 wafer_core/utils/kernel_utils/targets/__init__.py,sha256=4NwRLsuJ__S4xKAfda4Ag82C5MQ3Qio-4xA5S-mQGlU,2067
-wafer_core/utils/kernel_utils/targets/config.py,sha256=sNXyYTZ9rL9OET4xqbHZ0d4b8ChzST1yUKvNOv8JSQs,19933
+wafer_core/utils/kernel_utils/targets/config.py,sha256=V587DYkisEFoWwkmLQUW6I0mXkMEwA52sM7ZINslkK8,20625
 wafer_core/utils/kernel_utils/targets/execution.py,sha256=bZuNXCo0sIdD6hFhetLPrtDC-zMSiIsAx_aml49VVL0,15033
 wafer_core/utils/kernel_utils/targets/selection.py,sha256=5I_RG_7cfhq7uaeR28meC2EeNNKssFsK-Tc3QFG6Ze0,3590
 wafer_core/utils/modal_execution/__init__.py,sha256=jkVqYOLzCT5K73N9Od0UIUsx-99A0m6bpDrxfyXxQZ8,945
@@ -673,6 +679,6 @@ wafer_core/utils/modal_execution/modal_app.py,sha256=VfS2cX8gHtnlPXemmMcEwDPeQdh
 wafer_core/utils/modal_execution/modal_config.py,sha256=7cGX9TGqilQ3qxI3OFGXV5orjtyRU-PEDOJ4vP2oxno,4421
 wafer_core/utils/modal_execution/modal_execution.py,sha256=gChjnV6jqA3A7IRP3DfvV5cSfm_MN0X4f7JZufXgdZE,24594
 wafer_core/utils/modal_execution/test_modal.py,sha256=_jqou_hrLs1Daf1590Pnb0a_lXMMa2rczAPpW9HpoNQ,8153
-wafer_core-0.1.25.dist-info/METADATA,sha256=hNv2xTrSbhdaA1G8mBmLxe1HbYRE0NLX3C6w89sht3k,1420
-wafer_core-0.1.25.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-wafer_core-0.1.25.dist-info/RECORD,,
+wafer_core-0.1.26.dist-info/METADATA,sha256=xzTIIcsmbJkA06hTdoRb4uXZj2ud1-wnV7EXdLOSOe4,1420
+wafer_core-0.1.26.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+wafer_core-0.1.26.dist-info/RECORD,,

{wafer_core-0.1.25.dist-info → wafer_core-0.1.26.dist-info}/WHEEL RENAMED Viewed

File without changes

wafer-core 0.1.25__py3-none-any.whl → 0.1.26__py3-none-any.whl

wafer-core 0.1.25py3-none-any.whl → 0.1.26py3-none-any.whl