PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl - Mend

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1020) hide show

wisent/core/contrastive_pairs/diagnostics/control_vectors.py CHANGED Viewed

@@ -24,6 +24,17 @@ __all__ = [
     "GeometryAnalysisResult",
     "StructureType",
     "detect_geometry_structure",
+    "MultiLayerGeometryConfig",
+    "MultiLayerGeometryResult",
+    "LayerGeometryResult",
+    "detect_geometry_multi_layer",
+    "detect_geometry_all_layers",
+    "ExhaustiveCombinationResult",
+    "ExhaustiveGeometryAnalysisResult",
+    "detect_geometry_exhaustive",
+    "detect_geometry_limited",
+    "detect_geometry_contiguous",
+    "detect_geometry_smart",
 ]
@@ -84,7 +95,7 @@ def run_control_vector_diagnostics(
             )
             continue
-        flat = detached.to(dtype=torch.float32, device="cpu").reshape(-1)
+        flat = detached.to(device="cpu").reshape(-1)
         if not torch.isfinite(flat).all():
             non_finite = (~torch.isfinite(flat)).sum().item()
@@ -1058,66 +1069,88 @@ def _detect_cone_structure_score(
     neg_tensor: torch.Tensor,
     cfg: GeometryAnalysisConfig,
 ) -> StructureScore:
-    """Detect cone structure and return as StructureScore."""
-    cone_config = ConeAnalysisConfig(
-        num_directions=cfg.num_components,
-        optimization_steps=cfg.optimization_steps,
-        cone_threshold=cfg.cone_threshold,
-    )
+    """Detect cone structure using RAW cosine similarity of difference vectors.
+    A cone structure means:
+    - Multiple difference vectors (pos_i - neg_i) point in SIMILAR directions
+    - High cosine similarity between raw difference vectors
+    - NOT using gradient-optimized directions (which inflate the score)
+    This matches what the visualization computes.
+    """
     try:
-        result = check_cone_structure(pos_tensor, neg_tensor, cone_config)
-        # Cone is meaningful when:
-        # 1. Multiple directions are needed (PCA doesn't capture everything)
-        # 2. But directions are correlated (same half-space)
-        # 3. Cosine similarity is moderate (0.3-0.7 range ideal)
-        # Penalize if PCA already explains most variance (that's linear, not cone)
-        pca_penalty = result.pca_explained_variance  # High PCA = linear is enough
-        # Reward if cone explains more than PCA
-        cone_advantage = max(0, result.cone_explained_variance - result.pca_explained_variance)
-        # Cone needs moderate cosine similarity - not too high (= linear) not too low (= orthogonal)
-        cos_sim = result.avg_cosine_similarity
-        if cos_sim > 0.85:
-            # Very high similarity means directions are basically the same = linear
-            cosine_score = 0.3
-        elif cos_sim > 0.7:
-            cosine_score = 0.7
-        elif cos_sim > 0.3:
-            # Ideal range for cone
-            cosine_score = 1.0
+        # Compute raw difference vectors (what visualization uses)
+        n_pairs = min(pos_tensor.shape[0], neg_tensor.shape[0])
+        if n_pairs < 3:
+            return StructureScore(StructureType.CONE, 0.0, 0.0, {"reason": "insufficient_pairs"})
+        diff_vectors = pos_tensor[:n_pairs] - neg_tensor[:n_pairs]
+        # Normalize difference vectors
+        norms = diff_vectors.norm(dim=1, keepdim=True)
+        valid_mask = (norms.squeeze() > 1e-8)
+        if valid_mask.sum() < 3:
+            return StructureScore(StructureType.CONE, 0.0, 0.0, {"reason": "zero_differences"})
+        diff_normalized = diff_vectors[valid_mask] / norms[valid_mask]
+        # Compute pairwise cosine similarity matrix
+        cos_sim_matrix = diff_normalized @ diff_normalized.T
+        # Get off-diagonal elements (exclude self-similarity of 1.0)
+        n = cos_sim_matrix.shape[0]
+        mask = ~torch.eye(n, dtype=torch.bool, device=cos_sim_matrix.device)
+        off_diagonal = cos_sim_matrix[mask]
+        # Raw cosine similarity statistics
+        mean_cos_sim = float(off_diagonal.mean())
+        std_cos_sim = float(off_diagonal.std())
+        min_cos_sim = float(off_diagonal.min())
+        max_cos_sim = float(off_diagonal.max())
+        # Fraction of pairs with positive correlation (same half-space)
+        positive_fraction = float((off_diagonal > 0).float().mean())
+        # Fraction with strong correlation (>0.3)
+        strong_fraction = float((off_diagonal > 0.3).float().mean())
+        # Cone score based on raw cosine similarity:
+        # - High mean cosine = directions are aligned = cone
+        # - Low mean cosine = directions are independent = NOT cone
+        # - Negative mean cosine = directions are opposing = NOT cone
+        if mean_cos_sim < 0:
+            # Negative correlation = definitely not a cone
+            cone_score = 0.0
+        elif mean_cos_sim < 0.1:
+            # Near zero = orthogonal/independent, not cone
+            cone_score = mean_cos_sim  # 0.0 - 0.1
+        elif mean_cos_sim < 0.3:
+            # Weak correlation = weak cone
+            cone_score = 0.1 + 0.2 * ((mean_cos_sim - 0.1) / 0.2)  # 0.1 - 0.3
+        elif mean_cos_sim < 0.7:
+            # Moderate correlation = good cone (ideal range)
+            cone_score = 0.3 + 0.5 * ((mean_cos_sim - 0.3) / 0.4)  # 0.3 - 0.8
         else:
-            # Too different = not a cone
-            cosine_score = max(0, cos_sim / 0.3)
-        # Multiple significant directions needed
-        significant_dirs = sum(1 for s in result.separation_scores if abs(s) > 0.1)
-        multi_dir_score = min(significant_dirs / cfg.num_components, 1.0)
-        # Adjusted cone score
-        cone_score = (
-            0.25 * result.half_space_consistency +
-            0.25 * cosine_score +
-            0.20 * cone_advantage +
-            0.15 * multi_dir_score +
-            0.15 * (1 - pca_penalty)  # Penalize when PCA is sufficient
-        )
+            # Very high correlation = almost linear, still cone-like
+            cone_score = 0.8 + 0.2 * ((mean_cos_sim - 0.7) / 0.3)  # 0.8 - 1.0
+        # Confidence based on consistency (low std = more consistent = higher confidence)
+        consistency = max(0, 1 - std_cos_sim)
+        confidence = consistency * min(1.0, n_pairs / 20)
         return StructureScore(
             StructureType.CONE,
             score=float(cone_score),
-            confidence=result.half_space_consistency,
+            confidence=float(confidence),
             details={
-                "pca_explained": result.pca_explained_variance,
-                "cone_explained": result.cone_explained_variance,
-                "cone_advantage": float(cone_advantage),
-                "avg_cosine_similarity": result.avg_cosine_similarity,
-                "half_space_consistency": result.half_space_consistency,
-                "num_directions": result.num_directions_found,
-                "significant_directions": significant_dirs,
+                "raw_mean_cosine_similarity": mean_cos_sim,
+                "raw_std_cosine_similarity": std_cos_sim,
+                "raw_min_cosine_similarity": min_cos_sim,
+                "raw_max_cosine_similarity": max_cos_sim,
+                "positive_correlation_fraction": positive_fraction,
+                "strong_correlation_fraction": strong_fraction,
+                "n_valid_pairs": int(valid_mask.sum()),
             }
         )
     except Exception as e:
@@ -1130,7 +1163,17 @@ def _detect_cluster_structure(
     diff_vectors: torch.Tensor,
     cfg: GeometryAnalysisConfig,
 ) -> StructureScore:
-    """Detect if activations form discrete clusters."""
+    """Detect if activations form discrete clusters.
+    Cluster structure means:
+    - Data forms DISCRETE, SEPARATED groups
+    - Not just "pos vs neg" (that's trivially 2 clusters)
+    - Actual subgroups within the data
+    Key insight: k-means will ALWAYS find clusters.
+    We need high silhouette AND clear separation to claim clusters.
+    Also, if pos/neg perfectly separate, that's "linear", not "cluster".
+    """
     all_activations = torch.cat([pos_tensor, neg_tensor], dim=0)
     n_samples = all_activations.shape[0]
@@ -1143,7 +1186,6 @@ def _detect_cluster_structure(
     for k in range(2, min(cfg.max_clusters + 1, n_samples // 2)):
         try:
-            # Simple k-means implementation
             labels, centroids, silhouette = _kmeans_with_silhouette(all_activations, k, max_iters=50)
             silhouette_scores[k] = silhouette
@@ -1156,31 +1198,55 @@ def _detect_cluster_structure(
     if best_silhouette < 0:
         return StructureScore(StructureType.CLUSTER, 0.0, 0.0, {"reason": "clustering_failed"})
-    # Check if clusters separate pos/neg
+    # Check if clusters just separate pos/neg (that's linear, not cluster)
     labels, _, _ = _kmeans_with_silhouette(all_activations, best_k, max_iters=50)
     pos_labels = labels[:pos_tensor.shape[0]]
     neg_labels = labels[pos_tensor.shape[0]:]
-    # Cluster purity: do pos and neg end up in different clusters?
-    pos_majority = pos_labels.mode().values.item() if len(pos_labels) > 0 else -1
-    neg_majority = neg_labels.mode().values.item() if len(neg_labels) > 0 else -1
-    cluster_separation = 1.0 if pos_majority != neg_majority else 0.5
-    # Silhouette score ranges from -1 to 1, where:
-    # > 0.7 = strong structure
-    # 0.5-0.7 = reasonable structure
-    # 0.25-0.5 = weak structure
-    # < 0.25 = no substantial structure
-    # Only consider cluster structure if silhouette is reasonably high
-    if best_silhouette < cfg.cluster_silhouette_threshold:
-        # Low silhouette means no clear cluster structure
-        cluster_score = best_silhouette * 0.5  # Scale down significantly
+    # If k=2 and it perfectly separates pos/neg, that's LINEAR not cluster
+    if best_k == 2:
+        pos_mode = pos_labels.mode().values.item() if len(pos_labels) > 0 else -1
+        neg_mode = neg_labels.mode().values.item() if len(neg_labels) > 0 else -1
+        pos_purity = (pos_labels == pos_mode).float().mean()
+        neg_purity = (neg_labels == neg_mode).float().mean()
+        if pos_mode != neg_mode and pos_purity > 0.8 and neg_purity > 0.8:
+            # Perfect pos/neg separation - this is LINEAR, not cluster
+            return StructureScore(
+                StructureType.CLUSTER,
+                score=0.1,  # Low score - it's actually linear
+                confidence=0.8,
+                details={
+                    "reason": "pos_neg_separation_is_linear",
+                    "best_k": 2,
+                    "pos_purity": float(pos_purity),
+                    "neg_purity": float(neg_purity),
+                }
+            )
+    # For true cluster structure, we need:
+    # 1. High silhouette (> 0.5 is good, > 0.7 is strong)
+    # 2. k > 2 OR k=2 with mixed clusters
+    # Silhouette thresholds - be strict
+    if best_silhouette < 0.4:
+        # Very low silhouette = no clear cluster structure
+        cluster_score = best_silhouette * 0.3  # Very low score
+    elif best_silhouette < cfg.cluster_silhouette_threshold:
+        # Moderate silhouette = weak cluster structure
+        cluster_score = 0.1 + 0.2 * (best_silhouette / cfg.cluster_silhouette_threshold)
     else:
-        # Good silhouette - this is truly clustered data
-        # Normalize silhouette from [threshold, 1] to [0.5, 1]
-        normalized_silhouette = (best_silhouette - cfg.cluster_silhouette_threshold) / (1 - cfg.cluster_silhouette_threshold)
-        cluster_score = 0.5 + 0.4 * normalized_silhouette + 0.1 * cluster_separation
+        # High silhouette = good cluster structure
+        # But only if it's not just pos/neg separation
+        base_score = 0.3 + 0.5 * ((best_silhouette - cfg.cluster_silhouette_threshold) / (1 - cfg.cluster_silhouette_threshold))
+        # Bonus for k > 2 (more interesting structure)
+        if best_k > 2:
+            cluster_score = base_score + 0.2
+        else:
+            cluster_score = base_score
+    cluster_score = min(1.0, cluster_score)
     return StructureScore(
         StructureType.CLUSTER,
@@ -1190,7 +1256,6 @@ def _detect_cluster_structure(
             "best_k": best_k,
             "best_silhouette": float(best_silhouette),
             "all_silhouettes": {str(k): float(v) for k, v in silhouette_scores.items()},
-            "cluster_separation": float(cluster_separation),
             "silhouette_threshold": cfg.cluster_silhouette_threshold,
         }
     )
@@ -1279,7 +1344,19 @@ def _detect_manifold_structure(
     diff_vectors: torch.Tensor,
     cfg: GeometryAnalysisConfig,
 ) -> StructureScore:
-    """Detect non-linear manifold structure via intrinsic dimensionality."""
+    """Detect non-linear manifold structure.
+    Manifold structure means:
+    - Data lies on a CURVED surface (not linear)
+    - Linear methods (PCA, CAA) cannot capture the structure
+    - Requires non-linear methods (TITAN, neural steering)
+    Key insight: Manifold should be a FALLBACK, not default.
+    Only report manifold if:
+    1. Linear doesn't work (PCA explains little variance)
+    2. There's actual curvature (local neighborhoods don't align)
+    3. BUT there IS structure (not just noise)
+    """
     all_activations = torch.cat([pos_tensor, neg_tensor], dim=0)
     n_samples = all_activations.shape[0]
@@ -1287,52 +1364,80 @@ def _detect_manifold_structure(
         return StructureScore(StructureType.MANIFOLD, 0.0, 0.0, {"reason": "insufficient_data"})
     try:
-        # First check if there's meaningful separation
+        # 1. Check if linear works well (if yes, not manifold)
+        centered = all_activations - all_activations.mean(dim=0, keepdim=True)
+        try:
+            _, S, _ = torch.linalg.svd(centered, full_matrices=False)
+            total_var = (S ** 2).sum()
+            if total_var > 0:
+                # Top 2 PCs variance explained
+                top2_var = (S[:2] ** 2).sum() / total_var
+                linear_explains_well = float(top2_var) > 0.7
+            else:
+                linear_explains_well = True  # No variance = trivial
+        except Exception:
+            linear_explains_well = False
+            top2_var = torch.tensor(0.0)
+        if linear_explains_well:
+            # Linear works well - not a manifold (it's linear)
+            return StructureScore(
+                StructureType.MANIFOLD,
+                score=0.1,
+                confidence=0.8,
+                details={
+                    "reason": "linear_sufficient",
+                    "pca_top2_variance": float(top2_var),
+                }
+            )
+        # 2. Check for actual curvature (local PCA directions vary)
+        local_nonlinearity = _compute_local_nonlinearity(all_activations, cfg.manifold_neighbors)
+        # 3. Check if there's meaningful structure (separation between pos/neg)
         mean_diff = pos_tensor.mean(dim=0) - neg_tensor.mean(dim=0)
         separation_strength = mean_diff.norm() / (pos_tensor.std() + neg_tensor.std() + 1e-8)
         has_structure = min(float(separation_strength) / 2, 1.0)
-        if has_structure < 0.2:
-            # No meaningful separation - can't determine manifold structure
-            return StructureScore(StructureType.MANIFOLD, 0.1, 0.0, {"reason": "no_separation"})
-        # Estimate intrinsic dimensionality using correlation dimension
-        intrinsic_dim = _estimate_intrinsic_dimensionality(all_activations, cfg.manifold_neighbors)
-        # Compare to ambient dimension
-        ambient_dim = all_activations.shape[1]
-        dim_ratio = intrinsic_dim / ambient_dim
-        # Also compute local linearity deviation
-        local_nonlinearity = _compute_local_nonlinearity(all_activations, cfg.manifold_neighbors)
+        if has_structure < 0.3:
+            # No clear structure - likely noise, not manifold
+            return StructureScore(
+                StructureType.MANIFOLD,
+                score=0.2,
+                confidence=0.5,
+                details={
+                    "reason": "weak_structure",
+                    "separation_strength": float(separation_strength),
+                }
+            )
-        # Manifold score: high if low intrinsic dim AND non-linear AND has structure
-        # Low intrinsic dim alone could be linear, so we need nonlinearity
-        # But random noise also has "nonlinearity" - need to distinguish
+        # 4. Manifold requires BOTH:
+        #    - Linear doesn't work (already checked)
+        #    - AND there's curvature
+        #    - AND there's structure
-        # Manifold is meaningful only with significant dimension reduction
-        if dim_ratio > 0.5:
-            # Not much dimension reduction = not a clear manifold
-            manifold_score = 0.3 * has_structure
+        # If nonlinearity is low, it might be orthogonal/independent, not curved
+        if local_nonlinearity < 0.3:
+            manifold_score = 0.3 * has_structure  # Low score
         else:
+            # High nonlinearity + structure = manifold candidate
             manifold_score = (
-                0.30 * (1 - dim_ratio) +
-                0.25 * local_nonlinearity +
-                0.45 * has_structure  # Weight structure heavily
+                0.30 * local_nonlinearity +
+                0.30 * (1 - float(top2_var)) +  # Reward when linear fails
+                0.40 * has_structure
             )
-        # Confidence based on sample size
-        confidence = min(1.0, n_samples / 100)
+        # Confidence based on sample size and consistency
+        confidence = min(1.0, n_samples / 100) * has_structure
         return StructureScore(
             StructureType.MANIFOLD,
             score=float(manifold_score),
             confidence=float(confidence),
             details={
-                "intrinsic_dimensionality": float(intrinsic_dim),
-                "ambient_dimensionality": ambient_dim,
-                "dim_ratio": float(dim_ratio),
+                "pca_top2_variance": float(top2_var),
                 "local_nonlinearity": float(local_nonlinearity),
+                "separation_strength": float(separation_strength),
             }
         )
     except Exception as e:
@@ -1444,7 +1549,7 @@ def _detect_sparse_structure(
     sorted_abs = abs_diff.sort().values
     n = len(sorted_abs)
     cumsum = sorted_abs.cumsum(0)
-    gini = (2 * torch.arange(1, n + 1, dtype=torch.float32) @ sorted_abs - (n + 1) * sorted_abs.sum()) / (n * sorted_abs.sum() + 1e-10)
+    gini = (2 * torch.arange(1, n + 1, dtype=sorted_abs.dtype, device=sorted_abs.device) @ sorted_abs - (n + 1) * sorted_abs.sum()) / (n * sorted_abs.sum() + 1e-10)
     # Sparse score: high if few dimensions are active
     sparse_score = 0.4 * (1 - float(l1_l2_ratio)) + 0.3 * (1 - float(active_fraction)) + 0.3 * float(gini)
@@ -1527,11 +1632,11 @@ def _compute_dip_statistic(data: torch.Tensor) -> float:
         return 0.0
     # Empirical CDF
-    ecdf = torch.arange(1, n + 1, dtype=torch.float32) / n
+    ecdf = torch.arange(1, n + 1, dtype=sorted_data.dtype, device=sorted_data.device) / n
     # Greatest convex minorant and least concave majorant
     # Simplified: measure deviation from uniform
-    uniform = torch.linspace(0, 1, n)
+    uniform = torch.linspace(0, 1, n, dtype=sorted_data.dtype, device=sorted_data.device)
     # Kolmogorov-Smirnov like statistic
     ks_stat = (ecdf - uniform).abs().max()
@@ -1547,83 +1652,85 @@ def _detect_orthogonal_structure(
 ) -> StructureScore:
     """Detect if behavior is encoded in multiple orthogonal/independent subspaces.
-    Orthogonal structure means the data requires MULTIPLE independent directions
-    that are NOT correlated with each other. This is different from cone (where
-    directions are correlated) and linear (where one direction suffices).
-    """
-    if diff_vectors.shape[0] < cfg.num_components:
-        return StructureScore(StructureType.ORTHOGONAL, 0.0, 0.0, {"reason": "insufficient_data"})
+    Orthogonal structure means:
+    - Multiple difference vectors point in INDEPENDENT directions
+    - Low cosine similarity between difference vectors (near 0)
+    - NOT correlated (that's cone) and NOT single direction (that's linear)
+    This is the OPPOSITE of cone - if cosine sim is low, it's orthogonal.
+    Uses raw cosine similarity like the cone detector for consistency.
+    """
     try:
-        # PCA to understand variance distribution
-        centered = diff_vectors - diff_vectors.mean(dim=0, keepdim=True)
-        _, S, Vh = torch.linalg.svd(centered, full_matrices=False)
-        total_var = (S ** 2).sum()
-        if total_var < 1e-8:
-            return StructureScore(StructureType.ORTHOGONAL, 0.0, 0.0, {"reason": "no_variance"})
-        # For orthogonal structure:
-        # 1. Multiple components should have significant variance (not just one = linear)
-        # 2. Variance should be spread across multiple dimensions (not concentrated)
-        var_explained = (S ** 2) / total_var
-        k = min(cfg.num_components, len(S))
-        # First component dominance (low = more orthogonal/spread)
-        first_var = float(var_explained[0])
-        # Effective dimensionality (entropy-based)
-        var_explained_clipped = var_explained[var_explained > 1e-10]
-        entropy = -(var_explained_clipped * torch.log(var_explained_clipped + 1e-10)).sum()
-        max_entropy = torch.log(torch.tensor(float(len(var_explained_clipped))))
-        effective_dim_ratio = float(entropy / max_entropy) if max_entropy > 0 else 0.0
-        # Count significant dimensions (>5% variance each)
-        significant_dims = (var_explained > 0.05).sum().item()
-        multi_dim_score = min(significant_dims / 3, 1.0)  # 3+ significant dims is fully orthogonal
-        # Orthogonal structure is RARE and specific:
-        # It requires MULTIPLE INDEPENDENT directions with separation on EACH
-        # High spread alone is not orthogonal - it could be noise or cone
+        # Compute raw difference vectors (same as cone detector)
+        n_pairs = min(pos_tensor.shape[0], neg_tensor.shape[0])
+        if n_pairs < 3:
+            return StructureScore(StructureType.ORTHOGONAL, 0.0, 0.0, {"reason": "insufficient_pairs"})
+        diff_vectors_raw = pos_tensor[:n_pairs] - neg_tensor[:n_pairs]
+        # Normalize difference vectors
+        norms = diff_vectors_raw.norm(dim=1, keepdim=True)
+        valid_mask = (norms.squeeze() > 1e-8)
+        if valid_mask.sum() < 3:
+            return StructureScore(StructureType.ORTHOGONAL, 0.0, 0.0, {"reason": "zero_differences"})
+        diff_normalized = diff_vectors_raw[valid_mask] / norms[valid_mask]
+        # Compute pairwise cosine similarity matrix
+        cos_sim_matrix = diff_normalized @ diff_normalized.T
+        # Get off-diagonal elements
+        n = cos_sim_matrix.shape[0]
+        mask = ~torch.eye(n, dtype=torch.bool, device=cos_sim_matrix.device)
+        off_diagonal = cos_sim_matrix[mask]
+        # Raw cosine similarity statistics
+        mean_cos_sim = float(off_diagonal.mean())
+        std_cos_sim = float(off_diagonal.std())
+        abs_mean_cos_sim = float(off_diagonal.abs().mean())
+        # Fraction near zero (truly orthogonal)
+        near_zero_fraction = float((off_diagonal.abs() < 0.2).float().mean())
+        # Orthogonal = LOW cosine similarity (opposite of cone)
+        # Ideal orthogonal: mean cosine sim near 0, low absolute mean
+        if abs_mean_cos_sim < 0.1:
+            # Very low correlation = strong orthogonal
+            orthogonal_score = 0.8 + 0.2 * (1 - abs_mean_cos_sim / 0.1)
+        elif abs_mean_cos_sim < 0.2:
+            # Low correlation = moderate orthogonal
+            orthogonal_score = 0.5 + 0.3 * (1 - (abs_mean_cos_sim - 0.1) / 0.1)
+        elif abs_mean_cos_sim < 0.4:
+            # Moderate correlation = weak orthogonal
+            orthogonal_score = 0.2 + 0.3 * (1 - (abs_mean_cos_sim - 0.2) / 0.2)
+        else:
+            # High correlation = not orthogonal (probably cone or linear)
+            orthogonal_score = max(0, 0.2 * (1 - (abs_mean_cos_sim - 0.4) / 0.6))
-        # Check separation strength
+        # Check if there's meaningful separation (not just noise)
         mean_diff = pos_tensor.mean(dim=0) - neg_tensor.mean(dim=0)
         separation_strength = mean_diff.norm() / (pos_tensor.std() + neg_tensor.std() + 1e-8)
-        has_separation = min(float(separation_strength) / 3, 1.0)
-        # For true orthogonal structure, we need:
-        # 1. Strong separation (otherwise no structure)
-        # 2. Multiple significant dimensions (otherwise linear)
-        # 3. But NOT too spread (otherwise just noise)
-        # Sweet spot: 2-4 significant dimensions with clear separation
-        if significant_dims < 2:
-            # Too few dimensions = linear
-            orthogonal_score = 0.2
-        elif significant_dims > 10:
-            # Too many = likely noise, not structure
-            orthogonal_score = 0.3 * has_separation
-        else:
-            # Reasonable number of dimensions
-            # Check if it's not dominated by first (would be linear)
-            # and not too spread (would be noise)
-            structure_score = (
-                0.3 * (1 - first_var) +  # Not dominated by one direction
-                0.3 * min(significant_dims / 4, 1.0) +  # 2-4 directions is ideal
-                0.4 * has_separation  # Must have separation
-            )
-            orthogonal_score = structure_score * 0.8  # Scale down - orthogonal is rare
+        has_separation = min(float(separation_strength) / 2, 1.0)
+        # Orthogonal without separation is just noise
+        if has_separation < 0.3:
+            orthogonal_score *= 0.3  # Heavy penalty
+        # Confidence based on consistency and sample size
+        confidence = near_zero_fraction * min(1.0, n_pairs / 20)
         return StructureScore(
             StructureType.ORTHOGONAL,
             score=float(orthogonal_score),
-            confidence=min(1.0, diff_vectors.shape[0] / 30),
+            confidence=float(confidence),
             details={
-                "first_component_variance": float(first_var),
-                "effective_dim_ratio": float(effective_dim_ratio),
-                "significant_dimensions": int(significant_dims),
-                "top_5_variances": var_explained[:min(5, len(var_explained))].tolist(),
+                "raw_mean_cosine_similarity": mean_cos_sim,
+                "raw_abs_mean_cosine_similarity": abs_mean_cos_sim,
+                "raw_std_cosine_similarity": std_cos_sim,
+                "near_zero_fraction": near_zero_fraction,
+                "separation_strength": float(separation_strength),
+                "n_valid_pairs": int(valid_mask.sum()),
             }
         )
     except Exception as e:
@@ -1652,4 +1759,1302 @@ def _generate_recommendation(best_structure: StructureType, all_scores: Dict[str
         if second_best[1].score > 0.6:
             base_rec += f" (Also consider {second_best[0]}: score {second_best[1].score:.2f})"
-    return base_rec
+    return base_rec
+# =============================================================================
+# Multi-Layer Geometry Analysis
+# =============================================================================
+@dataclass
+class MultiLayerGeometryConfig:
+    """Configuration for multi-layer geometry analysis."""
+    num_components: int = 5
+    optimization_steps: int = 50
+    combination_method: str = "concat"  # "concat", "mean", "weighted"
+    analyze_per_layer: bool = True
+    analyze_combined: bool = True
+    analyze_subsets: bool = True  # early/middle/late
+    analyze_pairs: bool = True  # all pairs of layers
+    analyze_adjacent: bool = True  # adjacent layer pairs
+    analyze_skip: bool = True  # every other layer, every third, etc.
+    analyze_custom: Optional[List[List[int]]] = None  # custom layer combinations
+    max_pair_combinations: int = 50  # limit number of pair combinations to analyze
+@dataclass
+class LayerGeometryResult:
+    """Geometry result for a single layer."""
+    layer: int
+    best_structure: StructureType
+    best_score: float
+    all_scores: Dict[str, float]
+@dataclass
+class MultiLayerGeometryResult:
+    """Results from multi-layer geometry analysis."""
+    per_layer_results: Dict[int, LayerGeometryResult]
+    """Geometry analysis for each individual layer."""
+    combined_result: Optional[GeometryAnalysisResult]
+    """Geometry analysis for all layers combined."""
+    layer_subset_results: Dict[str, GeometryAnalysisResult]
+    """Geometry analysis for layer subsets (e.g., 'early', 'middle', 'late')."""
+    layer_pair_results: Dict[str, GeometryAnalysisResult]
+    """Geometry analysis for pairs of layers (e.g., 'L1+L5', 'L2+L8')."""
+    adjacent_pair_results: Dict[str, GeometryAnalysisResult]
+    """Geometry analysis for adjacent layer pairs (e.g., 'L1+L2', 'L2+L3')."""
+    skip_results: Dict[str, GeometryAnalysisResult]
+    """Geometry analysis for skip patterns (e.g., 'every_2nd', 'every_3rd')."""
+    custom_results: Dict[str, GeometryAnalysisResult]
+    """Geometry analysis for custom layer combinations."""
+    best_single_layer: int
+    """Layer with strongest structure detection."""
+    best_single_layer_structure: StructureType
+    """Structure type detected at best single layer."""
+    best_single_layer_score: float
+    """Score at best single layer."""
+    best_combination: Optional[str]
+    """Best performing layer combination (if better than single layer)."""
+    best_combination_score: float
+    """Score of best combination."""
+    best_combination_structure: Optional[StructureType]
+    """Structure type detected at best combination."""
+    combined_vs_single: str
+    """Whether combined layers improve over single layer."""
+    layer_agreement: float
+    """How much layers agree on structure type (0-1)."""
+    structure_by_depth: Dict[str, List[float]]
+    """How each structure score varies by layer depth."""
+    all_combinations_ranked: List[Tuple[str, float, StructureType]]
+    """All combinations ranked by score: (name, score, structure)."""
+    recommendation: str
+    """Recommendation based on multi-layer analysis."""
+def detect_geometry_multi_layer(
+    pos_activations_by_layer: Dict[int, torch.Tensor],
+    neg_activations_by_layer: Dict[int, torch.Tensor],
+    config: MultiLayerGeometryConfig | None = None,
+) -> MultiLayerGeometryResult:
+    """
+    Detect geometric structure across multiple layers.
+    Analyzes:
+    1. Each layer individually
+    2. All layers combined (concatenated or aggregated)
+    3. Layer subsets (early, middle, late)
+    4. Layer pairs (all combinations of 2 layers)
+    5. Adjacent layer pairs (L1+L2, L2+L3, etc.)
+    6. Skip patterns (every 2nd, every 3rd layer)
+    7. Custom layer combinations
+    8. How structure varies by depth
+    Arguments:
+        pos_activations_by_layer: Dict mapping layer index to positive activations [N, hidden_dim]
+        neg_activations_by_layer: Dict mapping layer index to negative activations [N, hidden_dim]
+        config: Analysis configuration
+    Returns:
+        MultiLayerGeometryResult with comprehensive multi-layer analysis
+    """
+    cfg = config or MultiLayerGeometryConfig()
+    geo_cfg = GeometryAnalysisConfig(num_components=cfg.num_components, optimization_steps=cfg.optimization_steps)
+    layers = sorted(pos_activations_by_layer.keys())
+    if not layers:
+        raise ValueError("No layers provided")
+    # Track all combination results for ranking
+    all_combo_results: Dict[str, GeometryAnalysisResult] = {}
+    # 1. Analyze each layer individually
+    per_layer_results: Dict[int, LayerGeometryResult] = {}
+    structure_by_depth: Dict[str, List[float]] = {
+        "linear": [], "cone": [], "cluster": [], "manifold": [],
+        "sparse": [], "bimodal": [], "orthogonal": []
+    }
+    if cfg.analyze_per_layer:
+        for layer in layers:
+            pos_acts = pos_activations_by_layer[layer]
+            neg_acts = neg_activations_by_layer[layer]
+            result = detect_geometry_structure(pos_acts, neg_acts, geo_cfg)
+            all_scores = {name: score.score for name, score in result.all_scores.items()}
+            per_layer_results[layer] = LayerGeometryResult(
+                layer=layer,
+                best_structure=result.best_structure,
+                best_score=result.best_score,
+                all_scores=all_scores,
+            )
+            all_combo_results[f"L{layer}"] = result
+            for struct_name, score in all_scores.items():
+                if struct_name in structure_by_depth:
+                    structure_by_depth[struct_name].append(score)
+    # 2. Find best single layer
+    if per_layer_results:
+        best_layer = max(per_layer_results.keys(), key=lambda l: per_layer_results[l].best_score)
+        best_single_layer = best_layer
+        best_single_layer_structure = per_layer_results[best_layer].best_structure
+        best_single_layer_score = per_layer_results[best_layer].best_score
+    else:
+        best_single_layer = layers[0]
+        best_single_layer_structure = StructureType.UNKNOWN
+        best_single_layer_score = 0.0
+    # 3. Analyze all layers combined
+    combined_result = None
+    if cfg.analyze_combined and len(layers) > 1:
+        combined_pos, combined_neg = _combine_layer_activations(
+            pos_activations_by_layer, neg_activations_by_layer, layers, cfg.combination_method
+        )
+        combined_result = detect_geometry_structure(combined_pos, combined_neg, geo_cfg)
+        all_combo_results["all_layers"] = combined_result
+    # 4. Analyze layer subsets (early, middle, late)
+    layer_subset_results: Dict[str, GeometryAnalysisResult] = {}
+    if cfg.analyze_subsets and len(layers) >= 3:
+        n_layers = len(layers)
+        third = n_layers // 3
+        early_layers = layers[:third] if third > 0 else layers[:1]
+        middle_layers = layers[third:2*third] if third > 0 else layers[1:2]
+        late_layers = layers[2*third:] if third > 0 else layers[-1:]
+        # Also add first_half and second_half
+        half = n_layers // 2
+        first_half = layers[:half] if half > 0 else layers[:1]
+        second_half = layers[half:] if half > 0 else layers[-1:]
+        subsets = [
+            ("early", early_layers),
+            ("middle", middle_layers),
+            ("late", late_layers),
+            ("first_half", first_half),
+            ("second_half", second_half),
+        ]
+        for subset_name, subset_layers in subsets:
+            if len(subset_layers) >= 1:
+                subset_pos, subset_neg = _combine_layer_activations(
+                    pos_activations_by_layer, neg_activations_by_layer, subset_layers, cfg.combination_method
+                )
+                result = detect_geometry_structure(subset_pos, subset_neg, geo_cfg)
+                layer_subset_results[subset_name] = result
+                all_combo_results[subset_name] = result
+    # 5. Analyze layer pairs
+    layer_pair_results: Dict[str, GeometryAnalysisResult] = {}
+    if cfg.analyze_pairs and len(layers) >= 2:
+        from itertools import combinations
+        pair_count = 0
+        for l1, l2 in combinations(layers, 2):
+            if pair_count >= cfg.max_pair_combinations:
+                break
+            pair_name = f"L{l1}+L{l2}"
+            pair_pos, pair_neg = _combine_layer_activations(
+                pos_activations_by_layer, neg_activations_by_layer, [l1, l2], cfg.combination_method
+            )
+            result = detect_geometry_structure(pair_pos, pair_neg, geo_cfg)
+            layer_pair_results[pair_name] = result
+            all_combo_results[pair_name] = result
+            pair_count += 1
+    # 6. Analyze adjacent layer pairs
+    adjacent_pair_results: Dict[str, GeometryAnalysisResult] = {}
+    if cfg.analyze_adjacent and len(layers) >= 2:
+        for i in range(len(layers) - 1):
+            l1, l2 = layers[i], layers[i + 1]
+            pair_name = f"adj_L{l1}+L{l2}"
+            pair_pos, pair_neg = _combine_layer_activations(
+                pos_activations_by_layer, neg_activations_by_layer, [l1, l2], cfg.combination_method
+            )
+            result = detect_geometry_structure(pair_pos, pair_neg, geo_cfg)
+            adjacent_pair_results[pair_name] = result
+            all_combo_results[pair_name] = result
+    # 7. Analyze skip patterns
+    skip_results: Dict[str, GeometryAnalysisResult] = {}
+    if cfg.analyze_skip and len(layers) >= 4:
+        # Every 2nd layer
+        every_2nd = layers[::2]
+        if len(every_2nd) >= 2:
+            skip_pos, skip_neg = _combine_layer_activations(
+                pos_activations_by_layer, neg_activations_by_layer, every_2nd, cfg.combination_method
+            )
+            result = detect_geometry_structure(skip_pos, skip_neg, geo_cfg)
+            skip_results["every_2nd"] = result
+            all_combo_results["every_2nd"] = result
+        # Every 3rd layer
+        if len(layers) >= 6:
+            every_3rd = layers[::3]
+            if len(every_3rd) >= 2:
+                skip_pos, skip_neg = _combine_layer_activations(
+                    pos_activations_by_layer, neg_activations_by_layer, every_3rd, cfg.combination_method
+                )
+                result = detect_geometry_structure(skip_pos, skip_neg, geo_cfg)
+                skip_results["every_3rd"] = result
+                all_combo_results["every_3rd"] = result
+        # First and last layer only
+        first_last = [layers[0], layers[-1]]
+        skip_pos, skip_neg = _combine_layer_activations(
+            pos_activations_by_layer, neg_activations_by_layer, first_last, cfg.combination_method
+        )
+        result = detect_geometry_structure(skip_pos, skip_neg, geo_cfg)
+        skip_results["first_last"] = result
+        all_combo_results["first_last"] = result
+        # First, middle, last
+        if len(layers) >= 3:
+            mid_idx = len(layers) // 2
+            first_mid_last = [layers[0], layers[mid_idx], layers[-1]]
+            skip_pos, skip_neg = _combine_layer_activations(
+                pos_activations_by_layer, neg_activations_by_layer, first_mid_last, cfg.combination_method
+            )
+            result = detect_geometry_structure(skip_pos, skip_neg, geo_cfg)
+            skip_results["first_mid_last"] = result
+            all_combo_results["first_mid_last"] = result
+    # 8. Analyze custom combinations
+    custom_results: Dict[str, GeometryAnalysisResult] = {}
+    if cfg.analyze_custom:
+        for i, custom_layers in enumerate(cfg.analyze_custom):
+            valid_layers = [l for l in custom_layers if l in layers]
+            if len(valid_layers) >= 1:
+                custom_name = f"custom_{i}_L" + "+L".join(map(str, valid_layers))
+                custom_pos, custom_neg = _combine_layer_activations(
+                    pos_activations_by_layer, neg_activations_by_layer, valid_layers, cfg.combination_method
+                )
+                result = detect_geometry_structure(custom_pos, custom_neg, geo_cfg)
+                custom_results[custom_name] = result
+                all_combo_results[custom_name] = result
+    # 9. Compute layer agreement
+    if per_layer_results:
+        structures = [r.best_structure for r in per_layer_results.values()]
+        most_common = max(set(structures), key=structures.count)
+        layer_agreement = structures.count(most_common) / len(structures)
+    else:
+        layer_agreement = 0.0
+    # 10. Rank all combinations and find best
+    all_combinations_ranked = sorted(
+        [(name, r.best_score, r.best_structure) for name, r in all_combo_results.items()],
+        key=lambda x: x[1],
+        reverse=True
+    )
+    if all_combinations_ranked:
+        best_combo_name, best_combo_score, best_combo_structure = all_combinations_ranked[0]
+        if best_combo_score > best_single_layer_score:
+            best_combination = best_combo_name
+            best_combination_score = best_combo_score
+            best_combination_structure = best_combo_structure
+        else:
+            best_combination = None
+            best_combination_score = best_single_layer_score
+            best_combination_structure = best_single_layer_structure
+    else:
+        best_combination = None
+        best_combination_score = best_single_layer_score
+        best_combination_structure = best_single_layer_structure
+    # 11. Compare combined vs single
+    if combined_result and per_layer_results:
+        if combined_result.best_score > best_single_layer_score + 0.1:
+            combined_vs_single = f"Combined ({combined_result.best_score:.2f}) better than single layer ({best_single_layer_score:.2f})"
+        elif best_single_layer_score > combined_result.best_score + 0.1:
+            combined_vs_single = f"Single layer {best_single_layer} ({best_single_layer_score:.2f}) better than combined ({combined_result.best_score:.2f})"
+        else:
+            combined_vs_single = f"Similar performance: combined={combined_result.best_score:.2f}, single={best_single_layer_score:.2f}"
+    else:
+        combined_vs_single = "No comparison available"
+    # 12. Generate recommendation
+    recommendation = _generate_multi_layer_recommendation_v2(
+        per_layer_results, combined_result, layer_subset_results,
+        layer_pair_results, skip_results,
+        best_single_layer, best_single_layer_structure, best_single_layer_score,
+        best_combination, best_combination_score, best_combination_structure,
+        layer_agreement, all_combinations_ranked
+    )
+    return MultiLayerGeometryResult(
+        per_layer_results=per_layer_results,
+        combined_result=combined_result,
+        layer_subset_results=layer_subset_results,
+        layer_pair_results=layer_pair_results,
+        adjacent_pair_results=adjacent_pair_results,
+        skip_results=skip_results,
+        custom_results=custom_results,
+        best_single_layer=best_single_layer,
+        best_single_layer_structure=best_single_layer_structure,
+        best_single_layer_score=best_single_layer_score,
+        best_combination=best_combination,
+        best_combination_score=best_combination_score,
+        best_combination_structure=best_combination_structure,
+        combined_vs_single=combined_vs_single,
+        layer_agreement=layer_agreement,
+        structure_by_depth=structure_by_depth,
+        all_combinations_ranked=all_combinations_ranked,
+        recommendation=recommendation,
+    )
+def _combine_layer_activations(
+    pos_by_layer: Dict[int, torch.Tensor],
+    neg_by_layer: Dict[int, torch.Tensor],
+    layers: List[int],
+    method: str = "concat",
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Combine activations from multiple layers."""
+    pos_acts = [pos_by_layer[l] for l in layers if l in pos_by_layer]
+    neg_acts = [neg_by_layer[l] for l in layers if l in neg_by_layer]
+    if not pos_acts or not neg_acts:
+        raise ValueError("No activations found for specified layers")
+    if method == "concat":
+        combined_pos = torch.cat(pos_acts, dim=-1)
+        combined_neg = torch.cat(neg_acts, dim=-1)
+    elif method == "mean":
+        combined_pos = torch.stack(pos_acts, dim=0).mean(dim=0)
+        combined_neg = torch.stack(neg_acts, dim=0).mean(dim=0)
+    elif method == "weighted":
+        weights = torch.linspace(0.5, 1.5, len(pos_acts))
+        weights = weights / weights.sum()
+        combined_pos = sum(w * a for w, a in zip(weights, pos_acts))
+        combined_neg = sum(w * a for w, a in zip(weights, neg_acts))
+    else:
+        raise ValueError(f"Unknown combination method: {method}")
+    return combined_pos, combined_neg
+def _generate_multi_layer_recommendation(
+    per_layer_results: Dict[int, LayerGeometryResult],
+    combined_result: Optional[GeometryAnalysisResult],
+    layer_subset_results: Dict[str, GeometryAnalysisResult],
+    best_single_layer: int,
+    best_single_layer_structure: StructureType,
+    best_single_layer_score: float,
+    layer_agreement: float,
+) -> str:
+    """Generate recommendation based on multi-layer analysis."""
+    parts = []
+    # Layer agreement insight
+    if layer_agreement > 0.8:
+        parts.append(f"High layer agreement ({layer_agreement:.0%}): structure is consistent across depth.")
+    elif layer_agreement < 0.4:
+        parts.append(f"Low layer agreement ({layer_agreement:.0%}): different structures at different depths.")
+    # Best layer recommendation
+    parts.append(f"Best single layer: {best_single_layer} with {best_single_layer_structure.value} ({best_single_layer_score:.2f}).")
+    # Combined vs single
+    if combined_result:
+        if combined_result.best_score > best_single_layer_score + 0.1:
+            parts.append(f"Combined layers improve detection ({combined_result.best_score:.2f} vs {best_single_layer_score:.2f}). Use multi-layer steering.")
+        else:
+            parts.append(f"Single layer is sufficient. Target layer {best_single_layer}.")
+    # Layer subset insights
+    if layer_subset_results:
+        subset_scores = {name: r.best_score for name, r in layer_subset_results.items()}
+        best_subset = max(subset_scores.keys(), key=lambda k: subset_scores[k])
+        if subset_scores[best_subset] > best_single_layer_score:
+            parts.append(f"'{best_subset}' layers show strongest structure ({subset_scores[best_subset]:.2f}).")
+    return " ".join(parts)
+def _generate_multi_layer_recommendation_v2(
+    per_layer_results: Dict[int, LayerGeometryResult],
+    combined_result: Optional[GeometryAnalysisResult],
+    layer_subset_results: Dict[str, GeometryAnalysisResult],
+    layer_pair_results: Dict[str, GeometryAnalysisResult],
+    skip_results: Dict[str, GeometryAnalysisResult],
+    best_single_layer: int,
+    best_single_layer_structure: StructureType,
+    best_single_layer_score: float,
+    best_combination: Optional[str],
+    best_combination_score: float,
+    best_combination_structure: Optional[StructureType],
+    layer_agreement: float,
+    all_combinations_ranked: List[Tuple[str, float, StructureType]],
+) -> str:
+    """Generate comprehensive recommendation based on multi-layer analysis."""
+    parts = []
+    # Layer agreement insight
+    if layer_agreement > 0.8:
+        parts.append(f"High layer agreement ({layer_agreement:.0%}): consistent structure across depth.")
+    elif layer_agreement < 0.4:
+        parts.append(f"Low layer agreement ({layer_agreement:.0%}): structure varies by depth.")
+    else:
+        parts.append(f"Moderate layer agreement ({layer_agreement:.0%}).")
+    # Overall best recommendation
+    if best_combination and best_combination_score > best_single_layer_score + 0.05:
+        improvement = best_combination_score - best_single_layer_score
+        parts.append(
+            f"BEST: '{best_combination}' ({best_combination_structure.value}: {best_combination_score:.2f}) "
+            f"outperforms single layer {best_single_layer} by {improvement:.2f}."
+        )
+    else:
+        parts.append(
+            f"BEST: Layer {best_single_layer} ({best_single_layer_structure.value}: {best_single_layer_score:.2f}). "
+            f"Multi-layer combinations don't improve detection."
+        )
+    # Top 3 combinations summary
+    if len(all_combinations_ranked) >= 3:
+        top3 = all_combinations_ranked[:3]
+        top3_str = ", ".join([f"{name}={score:.2f}" for name, score, _ in top3])
+        parts.append(f"Top 3: {top3_str}.")
+    # Specific pattern insights
+    if skip_results:
+        skip_scores = {name: r.best_score for name, r in skip_results.items()}
+        best_skip = max(skip_scores.keys(), key=lambda k: skip_scores[k])
+        if skip_scores[best_skip] > best_single_layer_score:
+            parts.append(f"Skip pattern '{best_skip}' is effective ({skip_scores[best_skip]:.2f}).")
+    if layer_pair_results:
+        pair_scores = {name: r.best_score for name, r in layer_pair_results.items()}
+        best_pair = max(pair_scores.keys(), key=lambda k: pair_scores[k])
+        best_pair_score = pair_scores[best_pair]
+        if best_pair_score > best_single_layer_score:
+            parts.append(f"Layer pair '{best_pair}' shows synergy ({best_pair_score:.2f}).")
+    # Depth pattern analysis
+    if per_layer_results and len(per_layer_results) >= 3:
+        layers_sorted = sorted(per_layer_results.keys())
+        early_score = per_layer_results[layers_sorted[0]].best_score
+        late_score = per_layer_results[layers_sorted[-1]].best_score
+        if late_score > early_score + 0.2:
+            parts.append("Later layers show stronger structure than early layers.")
+        elif early_score > late_score + 0.2:
+            parts.append("Early layers show stronger structure than later layers.")
+    return " ".join(parts)
+def detect_geometry_all_layers(
+    pairs_with_activations: List,
+    layers: Optional[List[int]] = None,
+    config: MultiLayerGeometryConfig | None = None,
+) -> MultiLayerGeometryResult:
+    """
+    Convenience function to detect geometry from pairs with pre-collected activations.
+    Arguments:
+        pairs_with_activations: List of ContrastivePair objects with layers_activations populated
+        layers: Specific layers to analyze (None = all available)
+        config: Analysis configuration
+    Returns:
+        MultiLayerGeometryResult
+    """
+    if not pairs_with_activations:
+        raise ValueError("No pairs provided")
+    # Extract activations by layer
+    pos_by_layer: Dict[int, List[torch.Tensor]] = {}
+    neg_by_layer: Dict[int, List[torch.Tensor]] = {}
+    for pair in pairs_with_activations:
+        pos_acts = pair.positive_response.layers_activations
+        neg_acts = pair.negative_response.layers_activations
+        for layer_key, act in pos_acts.items():
+            layer = int(layer_key)
+            if layers is None or layer in layers:
+                if layer not in pos_by_layer:
+                    pos_by_layer[layer] = []
+                pos_by_layer[layer].append(act.float() if act is not None else None)
+        for layer_key, act in neg_acts.items():
+            layer = int(layer_key)
+            if layers is None or layer in layers:
+                if layer not in neg_by_layer:
+                    neg_by_layer[layer] = []
+                neg_by_layer[layer].append(act.float() if act is not None else None)
+    # Stack into tensors
+    pos_tensors = {}
+    neg_tensors = {}
+    for layer in pos_by_layer:
+        valid_pos = [a for a in pos_by_layer[layer] if a is not None]
+        valid_neg = [a for a in neg_by_layer.get(layer, []) if a is not None]
+        if valid_pos and valid_neg:
+            pos_tensors[layer] = torch.stack(valid_pos)
+            neg_tensors[layer] = torch.stack(valid_neg)
+    return detect_geometry_multi_layer(pos_tensors, neg_tensors, config)
+@dataclass
+class ExhaustiveCombinationResult:
+    """Result for a single layer combination."""
+    layers: Tuple[int, ...]
+    best_structure: StructureType
+    best_score: float
+    all_scores: Dict[str, float]
+@dataclass
+class ExhaustiveGeometryAnalysisResult:
+    """Results from exhaustive layer combination analysis."""
+    total_combinations: int
+    """Total number of combinations tested."""
+    all_results: List[ExhaustiveCombinationResult]
+    """All results, sorted by best_score descending."""
+    best_combination: Tuple[int, ...]
+    """Layer combination with highest score."""
+    best_score: float
+    """Highest score achieved."""
+    best_structure: StructureType
+    """Structure type at best combination."""
+    top_10: List[ExhaustiveCombinationResult]
+    """Top 10 combinations."""
+    single_layer_best: int
+    """Best single layer."""
+    single_layer_best_score: float
+    """Score of best single layer."""
+    combination_beats_single: bool
+    """Whether any multi-layer combination beats best single layer."""
+    improvement_over_single: float
+    """How much best combination improves over best single layer."""
+    patterns: Dict[str, Any]
+    """Discovered patterns (layer frequency in top combinations, etc.)."""
+    recommendation: str
+    """Final recommendation."""
+def detect_geometry_exhaustive(
+    pos_activations_by_layer: Dict[int, torch.Tensor],
+    neg_activations_by_layer: Dict[int, torch.Tensor],
+    max_layers: int = 16,
+    combination_method: str = "concat",
+    num_components: int = 5,
+    progress_callback: Optional[Callable[[int, int], None]] = None,
+    top_k: int = 100,
+) -> ExhaustiveGeometryAnalysisResult:
+    """
+    Exhaustively test all 2^N - 1 layer combinations for geometric structure.
+    Memory-efficient: uses generators and only keeps top_k results in memory.
+    Arguments:
+        pos_activations_by_layer: Dict mapping layer index to positive activations [N, hidden_dim]
+        neg_activations_by_layer: Dict mapping layer index to negative activations [N, hidden_dim]
+        max_layers: Maximum number of layers to consider (limits combinations)
+        combination_method: How to combine layers ("concat", "mean", "weighted")
+        num_components: Number of PCA components for analysis
+        progress_callback: Optional callback(current, total) for progress reporting
+        top_k: Number of top results to keep in memory (default 100)
+    Returns:
+        ExhaustiveGeometryAnalysisResult with top combinations ranked
+    """
+    import heapq
+    from itertools import combinations as itertools_combinations
+    layers = sorted(pos_activations_by_layer.keys())[:max_layers]
+    n_layers = len(layers)
+    if n_layers == 0:
+        raise ValueError("No layers provided")
+    geo_cfg = GeometryAnalysisConfig(num_components=num_components, optimization_steps=50)
+    # Calculate total without building list (2^n - 1)
+    total_combinations = (1 << n_layers) - 1
+    # Use min-heap to keep top_k results (negate scores for max-heap behavior)
+    top_results_heap: List[Tuple[float, ExhaustiveCombinationResult]] = []
+    single_layer_results: List[ExhaustiveCombinationResult] = []
+    # Generator for combinations - no upfront memory allocation
+    def combo_generator():
+        for r in range(1, n_layers + 1):
+            for combo in itertools_combinations(layers, r):
+                yield combo
+    # Test each combination
+    idx = 0
+    for combo in combo_generator():
+        idx += 1
+        if progress_callback:
+            progress_callback(idx, total_combinations)
+        # Combine activations for this subset
+        if len(combo) == 1:
+            layer = combo[0]
+            combined_pos = pos_activations_by_layer[layer]
+            combined_neg = neg_activations_by_layer[layer]
+        else:
+            combined_pos, combined_neg = _combine_layer_activations(
+                pos_activations_by_layer, neg_activations_by_layer,
+                list(combo), combination_method
+            )
+        # Run geometry detection
+        result = detect_geometry_structure(combined_pos, combined_neg, geo_cfg)
+        all_scores = {name: score.score for name, score in result.all_scores.items()}
+        combo_result = ExhaustiveCombinationResult(
+            layers=combo,
+            best_structure=result.best_structure,
+            best_score=result.best_score,
+            all_scores=all_scores,
+        )
+        # Track single layer results separately
+        if len(combo) == 1:
+            single_layer_results.append(combo_result)
+        # Maintain top_k using heap
+        if len(top_results_heap) < top_k:
+            heapq.heappush(top_results_heap, (combo_result.best_score, combo_result))
+        elif combo_result.best_score > top_results_heap[0][0]:
+            heapq.heapreplace(top_results_heap, (combo_result.best_score, combo_result))
+    # Extract top results sorted by score descending
+    all_results = [r for _, r in sorted(top_results_heap, key=lambda x: -x[0])]
+    # Extract insights
+    best_result = all_results[0] if all_results else None
+    best_combination = best_result.layers if best_result else ()
+    best_score = best_result.best_score if best_result else 0.0
+    best_structure = best_result.best_structure if best_result else StructureType.UNKNOWN
+    top_10 = all_results[:10]
+    # Find best single layer
+    if single_layer_results:
+        single_layer_results.sort(key=lambda x: x.best_score, reverse=True)
+        single_layer_best = single_layer_results[0].layers[0]
+        single_layer_best_score = single_layer_results[0].best_score
+    else:
+        single_layer_best = layers[0]
+        single_layer_best_score = 0.0
+    combination_beats_single = best_score > single_layer_best_score
+    improvement_over_single = best_score - single_layer_best_score
+    # Analyze patterns from top results
+    patterns = _analyze_combination_patterns(all_results, layers, top_k=min(50, len(all_results)))
+    # Generate recommendation
+    recommendation = _generate_exhaustive_recommendation(
+        best_combination, best_score, best_structure,
+        single_layer_best, single_layer_best_score,
+        combination_beats_single, improvement_over_single,
+        patterns, total_combinations
+    )
+    return ExhaustiveGeometryAnalysisResult(
+        total_combinations=total_combinations,
+        all_results=all_results,
+        best_combination=best_combination,
+        best_score=best_score,
+        best_structure=best_structure,
+        top_10=top_10,
+        single_layer_best=single_layer_best,
+        single_layer_best_score=single_layer_best_score,
+        combination_beats_single=combination_beats_single,
+        improvement_over_single=improvement_over_single,
+        patterns=patterns,
+        recommendation=recommendation,
+    )
+def detect_geometry_limited(
+    pos_activations_by_layer: Dict[int, torch.Tensor],
+    neg_activations_by_layer: Dict[int, torch.Tensor],
+    max_combo_size: int = 3,
+    combination_method: str = "concat",
+    num_components: int = 5,
+    progress_callback: Optional[Callable[[int, int], None]] = None,
+    top_k: int = 100,
+) -> ExhaustiveGeometryAnalysisResult:
+    """
+    Test limited layer combinations: 1-layer, 2-layer, ..., max_combo_size-layer, plus all layers.
+    Much faster than exhaustive search while still finding good combinations.
+    For N layers with max_combo_size=3:
+    - 1-layer: N combinations
+    - 2-layer: N*(N-1)/2 combinations
+    - 3-layer: N*(N-1)*(N-2)/6 combinations
+    - all-layers: 1 combination
+    Total: O(N^3) instead of O(2^N)
+    Arguments:
+        pos_activations_by_layer: Dict mapping layer index to positive activations [N, hidden_dim]
+        neg_activations_by_layer: Dict mapping layer index to negative activations [N, hidden_dim]
+        max_combo_size: Maximum combination size to test (1, 2, 3, etc.) before jumping to all
+        combination_method: How to combine layers ("concat", "mean", "weighted")
+        num_components: Number of PCA components for analysis
+        progress_callback: Optional callback(current, total) for progress reporting
+        top_k: Number of top results to keep in memory (default 100)
+    Returns:
+        ExhaustiveGeometryAnalysisResult with top combinations ranked
+    """
+    import heapq
+    from itertools import combinations as itertools_combinations
+    from math import comb
+    layers = sorted(pos_activations_by_layer.keys())
+    n_layers = len(layers)
+    if n_layers == 0:
+        raise ValueError("No layers provided")
+    geo_cfg = GeometryAnalysisConfig(num_components=num_components, optimization_steps=50)
+    # Calculate total combinations: sum of C(n,r) for r=1 to max_combo_size, plus 1 for all layers
+    total_combinations = sum(comb(n_layers, r) for r in range(1, min(max_combo_size, n_layers) + 1))
+    if max_combo_size < n_layers:
+        total_combinations += 1  # Add all-layers combination
+    # Use min-heap to keep top_k results
+    top_results_heap: List[Tuple[float, ExhaustiveCombinationResult]] = []
+    single_layer_results: List[ExhaustiveCombinationResult] = []
+    # Generator for limited combinations
+    def combo_generator():
+        # 1-layer, 2-layer, ..., max_combo_size-layer
+        for r in range(1, min(max_combo_size, n_layers) + 1):
+            for combo in itertools_combinations(layers, r):
+                yield combo
+        # All layers (if not already included)
+        if max_combo_size < n_layers:
+            yield tuple(layers)
+    # Test each combination
+    idx = 0
+    for combo in combo_generator():
+        idx += 1
+        if progress_callback:
+            progress_callback(idx, total_combinations)
+        # Combine activations for this subset
+        if len(combo) == 1:
+            layer = combo[0]
+            combined_pos = pos_activations_by_layer[layer]
+            combined_neg = neg_activations_by_layer[layer]
+        else:
+            combined_pos, combined_neg = _combine_layer_activations(
+                pos_activations_by_layer, neg_activations_by_layer,
+                list(combo), combination_method
+            )
+        # Run geometry detection
+        result = detect_geometry_structure(combined_pos, combined_neg, geo_cfg)
+        all_scores = {name: score.score for name, score in result.all_scores.items()}
+        combo_result = ExhaustiveCombinationResult(
+            layers=combo,
+            best_structure=result.best_structure,
+            best_score=result.best_score,
+            all_scores=all_scores,
+        )
+        # Track single layer results separately
+        if len(combo) == 1:
+            single_layer_results.append(combo_result)
+        # Maintain top_k using heap
+        if len(top_results_heap) < top_k:
+            heapq.heappush(top_results_heap, (combo_result.best_score, combo_result))
+        elif combo_result.best_score > top_results_heap[0][0]:
+            heapq.heapreplace(top_results_heap, (combo_result.best_score, combo_result))
+    # Extract top results sorted by score descending
+    all_results = [r for _, r in sorted(top_results_heap, key=lambda x: -x[0])]
+    # Extract insights
+    best_result = all_results[0] if all_results else None
+    best_combination = best_result.layers if best_result else ()
+    best_score = best_result.best_score if best_result else 0.0
+    best_structure = best_result.best_structure if best_result else StructureType.UNKNOWN
+    top_10 = all_results[:10]
+    # Find best single layer
+    if single_layer_results:
+        single_layer_results.sort(key=lambda x: x.best_score, reverse=True)
+        single_layer_best = single_layer_results[0].layers[0]
+        single_layer_best_score = single_layer_results[0].best_score
+    else:
+        single_layer_best = layers[0]
+        single_layer_best_score = 0.0
+    combination_beats_single = best_score > single_layer_best_score
+    improvement_over_single = best_score - single_layer_best_score
+    # Analyze patterns from top results
+    patterns = _analyze_combination_patterns(all_results, layers, top_k=min(50, len(all_results)))
+    # Generate recommendation
+    recommendation = _generate_exhaustive_recommendation(
+        best_combination, best_score, best_structure,
+        single_layer_best, single_layer_best_score,
+        combination_beats_single, improvement_over_single,
+        patterns, total_combinations
+    )
+    return ExhaustiveGeometryAnalysisResult(
+        total_combinations=total_combinations,
+        all_results=all_results,
+        best_combination=best_combination,
+        best_score=best_score,
+        best_structure=best_structure,
+        top_10=top_10,
+        single_layer_best=single_layer_best,
+        single_layer_best_score=single_layer_best_score,
+        combination_beats_single=combination_beats_single,
+        improvement_over_single=improvement_over_single,
+        patterns=patterns,
+        recommendation=recommendation,
+    )
+def detect_geometry_contiguous(
+    pos_activations_by_layer: Dict[int, torch.Tensor],
+    neg_activations_by_layer: Dict[int, torch.Tensor],
+    combination_method: str = "concat",
+    num_components: int = 5,
+    progress_callback: Optional[Callable[[int, int], None]] = None,
+    top_k: int = 100,
+) -> ExhaustiveGeometryAnalysisResult:
+    """
+    Test contiguous layer combinations only.
+    Only tests combinations where layers are adjacent: 1-2, 2-3, 1-3, 5-8, etc.
+    Much faster: O(N^2) combinations instead of O(2^N).
+    For N layers: N*(N+1)/2 combinations
+    - 36 layers: 666 combinations
+    - 24 layers: 300 combinations
+    Arguments:
+        pos_activations_by_layer: Dict mapping layer index to positive activations [N, hidden_dim]
+        neg_activations_by_layer: Dict mapping layer index to negative activations [N, hidden_dim]
+        combination_method: How to combine layers ("concat", "mean", "weighted")
+        num_components: Number of PCA components for analysis
+        progress_callback: Optional callback(current, total) for progress reporting
+        top_k: Number of top results to keep in memory (default 100)
+    Returns:
+        ExhaustiveGeometryAnalysisResult with top combinations ranked
+    """
+    import heapq
+    layers = sorted(pos_activations_by_layer.keys())
+    n_layers = len(layers)
+    if n_layers == 0:
+        raise ValueError("No layers provided")
+    geo_cfg = GeometryAnalysisConfig(num_components=num_components, optimization_steps=50)
+    # Total contiguous combinations: N*(N+1)/2
+    total_combinations = n_layers * (n_layers + 1) // 2
+    # Use min-heap to keep top_k results
+    top_results_heap: List[Tuple[float, ExhaustiveCombinationResult]] = []
+    single_layer_results: List[ExhaustiveCombinationResult] = []
+    # Generator for contiguous combinations
+    def combo_generator():
+        # For each starting layer
+        for start_idx in range(n_layers):
+            # For each ending layer (inclusive)
+            for end_idx in range(start_idx, n_layers):
+                yield tuple(layers[start_idx:end_idx + 1])
+    # Test each combination
+    idx = 0
+    for combo in combo_generator():
+        idx += 1
+        if progress_callback:
+            progress_callback(idx, total_combinations)
+        # Combine activations for this subset
+        if len(combo) == 1:
+            layer = combo[0]
+            combined_pos = pos_activations_by_layer[layer]
+            combined_neg = neg_activations_by_layer[layer]
+        else:
+            combined_pos, combined_neg = _combine_layer_activations(
+                pos_activations_by_layer, neg_activations_by_layer,
+                list(combo), combination_method
+            )
+        # Run geometry detection
+        result = detect_geometry_structure(combined_pos, combined_neg, geo_cfg)
+        all_scores = {name: score.score for name, score in result.all_scores.items()}
+        combo_result = ExhaustiveCombinationResult(
+            layers=combo,
+            best_structure=result.best_structure,
+            best_score=result.best_score,
+            all_scores=all_scores,
+        )
+        # Track single layer results separately
+        if len(combo) == 1:
+            single_layer_results.append(combo_result)
+        # Maintain top_k using heap
+        if len(top_results_heap) < top_k:
+            heapq.heappush(top_results_heap, (combo_result.best_score, combo_result))
+        elif combo_result.best_score > top_results_heap[0][0]:
+            heapq.heapreplace(top_results_heap, (combo_result.best_score, combo_result))
+    # Extract top results sorted by score descending
+    all_results = [r for _, r in sorted(top_results_heap, key=lambda x: -x[0])]
+    # Extract insights
+    best_result = all_results[0] if all_results else None
+    best_combination = best_result.layers if best_result else ()
+    best_score = best_result.best_score if best_result else 0.0
+    best_structure = best_result.best_structure if best_result else StructureType.UNKNOWN
+    top_10 = all_results[:10]
+    # Find best single layer
+    if single_layer_results:
+        single_layer_results.sort(key=lambda x: x.best_score, reverse=True)
+        single_layer_best = single_layer_results[0].layers[0]
+        single_layer_best_score = single_layer_results[0].best_score
+    else:
+        single_layer_best = layers[0]
+        single_layer_best_score = 0.0
+    combination_beats_single = best_score > single_layer_best_score
+    improvement_over_single = best_score - single_layer_best_score
+    # Analyze patterns from top results
+    patterns = _analyze_combination_patterns(all_results, layers, top_k=min(50, len(all_results)))
+    # Generate recommendation
+    recommendation = _generate_exhaustive_recommendation(
+        best_combination, best_score, best_structure,
+        single_layer_best, single_layer_best_score,
+        combination_beats_single, improvement_over_single,
+        patterns, total_combinations
+    )
+    return ExhaustiveGeometryAnalysisResult(
+        total_combinations=total_combinations,
+        all_results=all_results,
+        best_combination=best_combination,
+        best_score=best_score,
+        best_structure=best_structure,
+        top_10=top_10,
+        single_layer_best=single_layer_best,
+        single_layer_best_score=single_layer_best_score,
+        combination_beats_single=combination_beats_single,
+        improvement_over_single=improvement_over_single,
+        patterns=patterns,
+        recommendation=recommendation,
+    )
+def detect_geometry_smart(
+    pos_activations_by_layer: Dict[int, torch.Tensor],
+    neg_activations_by_layer: Dict[int, torch.Tensor],
+    max_combo_size: int = 3,
+    combination_method: str = "concat",
+    num_components: int = 5,
+    progress_callback: Optional[Callable[[int, int], None]] = None,
+    top_k: int = 100,
+) -> ExhaustiveGeometryAnalysisResult:
+    """
+    Smart layer combination search: contiguous + limited (1,2,3-layer) combinations.
+    Tests:
+    1. All contiguous combinations (L1-L2, L1-L3, L5-L10, etc.)
+    2. All 1,2,3-layer non-contiguous combinations
+    Deduplicates overlapping combinations.
+    For N=36 layers with max_combo_size=3:
+    - Contiguous: 666 combinations
+    - Limited non-contiguous: ~7,100 additional combinations
+    - Total: ~7,800 unique combinations
+    Arguments:
+        pos_activations_by_layer: Dict mapping layer index to positive activations [N, hidden_dim]
+        neg_activations_by_layer: Dict mapping layer index to negative activations [N, hidden_dim]
+        max_combo_size: Maximum combination size for non-contiguous (default: 3)
+        combination_method: How to combine layers ("concat", "mean", "weighted")
+        num_components: Number of PCA components for analysis
+        progress_callback: Optional callback(current, total) for progress reporting
+        top_k: Number of top results to keep in memory (default 100)
+    Returns:
+        ExhaustiveGeometryAnalysisResult with top combinations ranked
+    """
+    import heapq
+    from itertools import combinations as itertools_combinations
+    layers = sorted(pos_activations_by_layer.keys())
+    n_layers = len(layers)
+    if n_layers == 0:
+        raise ValueError("No layers provided")
+    geo_cfg = GeometryAnalysisConfig(num_components=num_components, optimization_steps=50)
+    # Generate all unique combinations: contiguous + limited
+    all_combos_set: set = set()
+    # Add contiguous combinations
+    for start_idx in range(n_layers):
+        for end_idx in range(start_idx, n_layers):
+            all_combos_set.add(tuple(layers[start_idx:end_idx + 1]))
+    # Add limited combinations (1,2,3-layer)
+    for r in range(1, min(max_combo_size, n_layers) + 1):
+        for combo in itertools_combinations(layers, r):
+            all_combos_set.add(combo)
+    # Convert to sorted list
+    all_combos = sorted(all_combos_set, key=lambda x: (len(x), x))
+    total_combinations = len(all_combos)
+    # Use min-heap to keep top_k results
+    top_results_heap: List[Tuple[float, ExhaustiveCombinationResult]] = []
+    single_layer_results: List[ExhaustiveCombinationResult] = []
+    # Test each combination
+    for idx, combo in enumerate(all_combos):
+        if progress_callback:
+            progress_callback(idx + 1, total_combinations)
+        # Combine activations for this subset
+        if len(combo) == 1:
+            layer = combo[0]
+            combined_pos = pos_activations_by_layer[layer]
+            combined_neg = neg_activations_by_layer[layer]
+        else:
+            combined_pos, combined_neg = _combine_layer_activations(
+                pos_activations_by_layer, neg_activations_by_layer,
+                list(combo), combination_method
+            )
+        # Run geometry detection
+        result = detect_geometry_structure(combined_pos, combined_neg, geo_cfg)
+        all_scores = {name: score.score for name, score in result.all_scores.items()}
+        combo_result = ExhaustiveCombinationResult(
+            layers=combo,
+            best_structure=result.best_structure,
+            best_score=result.best_score,
+            all_scores=all_scores,
+        )
+        # Track single layer results separately
+        if len(combo) == 1:
+            single_layer_results.append(combo_result)
+        # Maintain top_k using heap
+        if len(top_results_heap) < top_k:
+            heapq.heappush(top_results_heap, (combo_result.best_score, combo_result))
+        elif combo_result.best_score > top_results_heap[0][0]:
+            heapq.heapreplace(top_results_heap, (combo_result.best_score, combo_result))
+    # Extract top results sorted by score descending
+    all_results = [r for _, r in sorted(top_results_heap, key=lambda x: -x[0])]
+    # Extract insights
+    best_result = all_results[0] if all_results else None
+    best_combination = best_result.layers if best_result else ()
+    best_score = best_result.best_score if best_result else 0.0
+    best_structure = best_result.best_structure if best_result else StructureType.UNKNOWN
+    top_10 = all_results[:10]
+    # Find best single layer
+    if single_layer_results:
+        single_layer_results.sort(key=lambda x: x.best_score, reverse=True)
+        single_layer_best = single_layer_results[0].layers[0]
+        single_layer_best_score = single_layer_results[0].best_score
+    else:
+        single_layer_best = layers[0]
+        single_layer_best_score = 0.0
+    combination_beats_single = best_score > single_layer_best_score
+    improvement_over_single = best_score - single_layer_best_score
+    # Analyze patterns from top results
+    patterns = _analyze_combination_patterns(all_results, layers, top_k=min(50, len(all_results)))
+    # Generate recommendation
+    recommendation = _generate_exhaustive_recommendation(
+        best_combination, best_score, best_structure,
+        single_layer_best, single_layer_best_score,
+        combination_beats_single, improvement_over_single,
+        patterns, total_combinations
+    )
+    return ExhaustiveGeometryAnalysisResult(
+        total_combinations=total_combinations,
+        all_results=all_results,
+        best_combination=best_combination,
+        best_score=best_score,
+        best_structure=best_structure,
+        top_10=top_10,
+        single_layer_best=single_layer_best,
+        single_layer_best_score=single_layer_best_score,
+        combination_beats_single=combination_beats_single,
+        improvement_over_single=improvement_over_single,
+        patterns=patterns,
+        recommendation=recommendation,
+    )
+def _analyze_combination_patterns(
+    all_results: List[ExhaustiveCombinationResult],
+    layers: List[int],
+    top_k: int = 50,
+) -> Dict[str, Any]:
+    """Analyze patterns in top combinations."""
+    from collections import Counter
+    top_results = all_results[:top_k]
+    # Layer frequency in top combinations
+    layer_freq = Counter()
+    for r in top_results:
+        for layer in r.layers:
+            layer_freq[layer] += 1
+    # Combination size distribution in top results
+    size_dist = Counter(len(r.layers) for r in top_results)
+    # Best score by combination size
+    size_to_best: Dict[int, float] = {}
+    for r in all_results:
+        size = len(r.layers)
+        if size not in size_to_best or r.best_score > size_to_best[size]:
+            size_to_best[size] = r.best_score
+    # Structure frequency in top combinations
+    structure_freq = Counter(r.best_structure for r in top_results)
+    # Adjacent layer pairs in top combinations
+    adjacent_count = 0
+    for r in top_results:
+        if len(r.layers) >= 2:
+            sorted_layers = sorted(r.layers)
+            for i in range(len(sorted_layers) - 1):
+                if sorted_layers[i + 1] - sorted_layers[i] == 1:
+                    adjacent_count += 1
+                    break
+    # Layer position analysis (early vs late layers)
+    mid_layer = layers[len(layers) // 2] if layers else 0
+    early_in_top = sum(1 for r in top_results for l in r.layers if l < mid_layer)
+    late_in_top = sum(1 for r in top_results for l in r.layers if l >= mid_layer)
+    return {
+        "layer_frequency_in_top": dict(layer_freq.most_common()),
+        "most_important_layers": [l for l, _ in layer_freq.most_common(5)],
+        "size_distribution_in_top": dict(size_dist),
+        "best_score_by_size": size_to_best,
+        "optimal_combination_size": max(size_to_best.keys(), key=lambda k: size_to_best[k]) if size_to_best else 1,
+        "structure_frequency_in_top": {s.value: c for s, c in structure_freq.most_common()},
+        "dominant_structure": structure_freq.most_common(1)[0][0].value if structure_freq else "unknown",
+        "adjacent_pairs_in_top": adjacent_count,
+        "early_vs_late_ratio": early_in_top / late_in_top if late_in_top > 0 else float('inf'),
+    }
+def _generate_exhaustive_recommendation(
+    best_combination: Tuple[int, ...],
+    best_score: float,
+    best_structure: StructureType,
+    single_layer_best: int,
+    single_layer_best_score: float,
+    combination_beats_single: bool,
+    improvement_over_single: float,
+    patterns: Dict[str, Any],
+    total_combinations: int,
+) -> str:
+    """Generate recommendation from exhaustive analysis."""
+    parts = []
+    parts.append(f"Tested {total_combinations} layer combinations.")
+    if combination_beats_single and improvement_over_single > 0.05:
+        layers_str = "+".join(f"L{l}" for l in best_combination)
+        parts.append(
+            f"BEST: {layers_str} ({best_structure.value}: {best_score:.3f}), "
+            f"+{improvement_over_single:.3f} over single layer L{single_layer_best}."
+        )
+    else:
+        parts.append(
+            f"BEST: Single layer L{single_layer_best} ({best_score:.3f}). "
+            f"Multi-layer combinations don't significantly improve."
+        )
+    # Pattern insights
+    opt_size = patterns.get("optimal_combination_size", 1)
+    if opt_size > 1:
+        parts.append(f"Optimal combination size: {opt_size} layers.")
+    important_layers = patterns.get("most_important_layers", [])
+    if important_layers:
+        layers_str = ", ".join(f"L{l}" for l in important_layers[:3])
+        parts.append(f"Most important layers: {layers_str}.")
+    dominant = patterns.get("dominant_structure", "unknown")
+    parts.append(f"Dominant structure: {dominant}.")
+    return " ".join(parts)

wisent 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl