PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl → 0.7.701__py3-none-any.whl - Mend

wisent 0.7.379py3-none-any.whl → 0.7.701py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (725) hide show

wisent/core/parser_arguments/tasks_parser.py CHANGED Viewed

@@ -155,12 +155,14 @@ def setup_tasks_parser(parser):
     parser.add_argument("--device", type=str, default=None, help="Device to run on")
     parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility")
     parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+    # Extraction strategy - unified approach combining prompt format and token selection
+    from wisent.core.activations.extraction_strategy import ExtractionStrategy
     parser.add_argument(
-        "--token-aggregation",
+        "--extraction-strategy",
         type=str,
-        choices=["average", "final", "first", "max", "min", "max_score"],
-        default="average",
-        help="How to aggregate token scores for classification. 'max_score' uses the highest individual token hallucination score.",
+        choices=ExtractionStrategy.list_all(),
+        default=ExtractionStrategy.default().value,
+        help=f"Extraction strategy for activations. Options: {', '.join(ExtractionStrategy.list_all())}. Default: {ExtractionStrategy.default().value}",
     )
     parser.add_argument(
         "--ground-truth-method",
@@ -385,21 +387,7 @@ def setup_tasks_parser(parser):
         help="Directory for saving/loading classifiers and vectors (default: ./models)",
     )
-    # Prompt construction and token targeting strategy arguments
-    parser.add_argument(
-        "--prompt-construction-strategy",
-        type=str,
-        choices=["multiple_choice", "role_playing", "direct_completion", "instruction_following", "chat_template"],
-        default="chat_template",
-        help="Strategy for constructing prompts from question-answer pairs (default: chat_template)",
-    )
-    parser.add_argument(
-        "--token-targeting-strategy",
-        type=str,
-        choices=["choice_token", "continuation_token", "last_token", "first_token", "mean_pooling", "max_pooling"],
-        default="choice_token",
-        help="Strategy for targeting tokens during activation extraction (default: choice_token)",
-    )
     # Normalization options
     parser.add_argument("--normalize-mode", action="store_true", help="Enable normalization mode (legacy flag)")

wisent/core/steering_methods/core/atoms.py CHANGED Viewed

@@ -150,5 +150,4 @@ class PerLayerBaseSteeringMethod(BaseSteeringMethod):
             raw[layer] = self.train_for_layer(pos_list, neg_list)
         dtype = self.kwargs.get("dtype", None)
-        agg = self.kwargs.get("activation_aggregation_strategy", None)
-        return LayerActivations(raw, activation_aggregation_strategy=agg, dtype=dtype)
+        return LayerActivations(raw, dtype=dtype)

wisent/core/steering_methods/methods/caa.py CHANGED Viewed

@@ -14,7 +14,7 @@ class CAAMethod(PerLayerBaseSteeringMethod):
     """
     Contrastive Activation Additions (CAA).
     For each layer: v = mean(positives) - mean(negatives),
-    optionally L2-normalized (kwargs: normalize=True, dtype=..., activation_aggregation_strategy=...).
+    optionally L2-normalized (kwargs: normalize=True, dtype=...).
     """
     name = "caa"
     description = "Per-layer mean(pos)-mean(neg) over ContrastivePairSet."

wisent/core/steering_methods/methods/hyperplane.py ADDED Viewed

@@ -0,0 +1,74 @@
+from __future__ import annotations
+from typing import List
+import torch
+import numpy as np
+from wisent.core.steering_methods.core.atoms import PerLayerBaseSteeringMethod
+from wisent.core.errors import InsufficientDataError
+__all__ = [
+    "HyperplaneMethod",
+]
+class HyperplaneMethod(PerLayerBaseSteeringMethod):
+    """
+    Hyperplane-based steering using classifier decision boundary.
+    Instead of computing mean(pos) - mean(neg) like CAA, this method trains
+    a logistic regression classifier to separate positive from negative activations,
+    then uses the classifier's weight vector (hyperplane normal) as the steering vector.
+    This works better when the geometry is orthogonal (each contrastive pair has
+    a unique direction) rather than linear (all pairs share a common direction).
+    In orthogonal geometry, CAA's mean difference cancels out to near-zero,
+    while the classifier can still find a separating hyperplane.
+    """
+    name = "hyperplane"
+    description = "Classifier-based steering using logistic regression decision boundary as steering vector."
+    def train_for_layer(self, pos_list: List[torch.Tensor], neg_list: List[torch.Tensor]) -> torch.Tensor:
+        """
+        Train hyperplane steering vector for a single layer using logistic regression.
+        arguments:
+            pos_list: List of positive activations (torch.Tensor) for this layer.
+            neg_list: List of negative activations (torch.Tensor) for this layer.
+        returns:
+            torch.Tensor steering vector for the layer (classifier weights / hyperplane normal).
+        """
+        if not pos_list or not neg_list:
+            raise InsufficientDataError(reason="Both positive and negative lists must be non-empty.")
+        pos = torch.stack([t.detach().to("cpu").float().reshape(-1) for t in pos_list], dim=0)
+        neg = torch.stack([t.detach().to("cpu").float().reshape(-1) for t in neg_list], dim=0)
+        pos_np = pos.numpy()
+        neg_np = neg.numpy()
+        X = np.vstack([pos_np, neg_np])
+        y = np.array([1] * len(pos_np) + [0] * len(neg_np))
+        # Train logistic regression classifier
+        from sklearn.linear_model import LogisticRegression
+        max_iter = int(self.kwargs.get("max_iter", 1000))
+        C = float(self.kwargs.get("C", 1.0))
+        clf = LogisticRegression(max_iter=max_iter, C=C, solver="lbfgs")
+        clf.fit(X, y)
+        # Use classifier weights as steering vector
+        v = torch.tensor(clf.coef_[0], dtype=torch.float32)
+        if bool(self.kwargs.get("normalize", True)):
+            v = self._safe_l2_normalize(v)
+        return v
+    def _safe_l2_normalize(self, v: torch.Tensor, eps: float = 1e-12) -> torch.Tensor:
+        if v.ndim != 1:
+            v = v.reshape(-1)
+        return v / (torch.linalg.norm(v) + eps)

wisent/core/steering_methods/methods/prism.py CHANGED Viewed

@@ -219,8 +219,7 @@ class PRISMMethod(BaseSteeringMethod):
         primary_map: RawActivationMap = multi_result.to_single_direction_map()
         dtype = self.kwargs.get("dtype", None)
-        agg = self.kwargs.get("activation_aggregation_strategy", None)
-        return LayerActivations(primary_map, activation_aggregation_strategy=agg, dtype=dtype)
+        return LayerActivations(primary_map, dtype=dtype)
     def train_multi(self, pair_set: ContrastivePairSet) -> MultiDirectionResult:
         """

wisent/core/steering_methods/methods/pulse.py CHANGED Viewed

@@ -41,15 +41,30 @@ class PULSEConfig:
     """Configuration for PULSE steering method."""
     # Layer configuration
-    sensor_layer: int = 15
-    """Layer index where condition gating is computed."""
+    sensor_layer: Optional[int] = None
+    """Layer index where condition gating is computed. If None, auto-computed from num_layers."""
-    steering_layers: List[int] = field(default_factory=lambda: [12, 13, 14, 15, 16, 17, 18])
-    """Layer indices where steering is applied."""
+    steering_layers: Optional[List[int]] = None
+    """Layer indices where steering is applied. If None, auto-computed from num_layers."""
+    num_layers: Optional[int] = None
+    """Total layers in the model. Used to auto-compute steering_layers and sensor_layer."""
     per_layer_scaling: bool = True
     """Whether to learn/use different scaling per layer."""
+    def resolve_layers(self, num_layers: int) -> None:
+        """Resolve steering_layers and sensor_layer based on model's num_layers."""
+        self.num_layers = num_layers
+        if self.sensor_layer is None:
+            # 75% through the network
+            self.sensor_layer = int(num_layers * 0.75)
+        if self.steering_layers is None:
+            # Middle to late layers (50% to 85% of network)
+            start = int(num_layers * 0.5)
+            end = int(num_layers * 0.85)
+            self.steering_layers = list(range(start, end))
     # Condition gating
     condition_threshold: float = 0.5
     """Threshold for condition activation (0-1)."""
@@ -188,9 +203,12 @@ class PULSEMethod(BaseSteeringMethod):
     def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
+        # steering_layers and sensor_layer default to None - resolved at training time
+        # based on actual num_layers in the model
         self.config = PULSEConfig(
-            sensor_layer=kwargs.get("sensor_layer", 15),
-            steering_layers=kwargs.get("steering_layers", [12, 13, 14, 15, 16, 17, 18]),
+            sensor_layer=kwargs.get("sensor_layer", None),  # Auto-resolve from num_layers
+            steering_layers=kwargs.get("steering_layers", None),  # Auto-resolve from num_layers
+            num_layers=kwargs.get("num_layers", None),
             per_layer_scaling=kwargs.get("per_layer_scaling", True),
             condition_threshold=kwargs.get("condition_threshold", 0.5),
             gate_temperature=kwargs.get("gate_temperature", 0.1),
@@ -224,8 +242,7 @@ class PULSEMethod(BaseSteeringMethod):
         # Return behavior vectors as LayerActivations
         dtype = self.kwargs.get("dtype", None)
-        agg = self.kwargs.get("activation_aggregation_strategy", None)
-        return LayerActivations(result.behavior_vectors, activation_aggregation_strategy=agg, dtype=dtype)
+        return LayerActivations(result.behavior_vectors, dtype=dtype)
     def train_pulse(
         self,
@@ -246,6 +263,20 @@ class PULSEMethod(BaseSteeringMethod):
         if condition_pairs is None:
             condition_pairs = behavior_pairs
+        # Detect num_layers from available data and resolve config
+        buckets = self._collect_from_set(behavior_pairs)
+        if buckets:
+            max_layer_idx = 0
+            for layer_name in buckets.keys():
+                try:
+                    layer_idx = int(str(layer_name).split("_")[-1])
+                    max_layer_idx = max(max_layer_idx, layer_idx)
+                except (ValueError, IndexError):
+                    pass
+            detected_num_layers = max_layer_idx + 1
+            if self.config.steering_layers is None or self.config.sensor_layer is None:
+                self.config.resolve_layers(detected_num_layers)
         # 1. Train behavior vectors for steering layers
         behavior_vectors = self._train_behavior_vectors(behavior_pairs)

wisent/core/steering_methods/methods/titan.py CHANGED Viewed

@@ -52,18 +52,42 @@ class TITANConfig:
     """Number of directions per layer in the steering manifold."""
     # Layer configuration
-    steering_layers: List[int] = field(default_factory=lambda: [10, 11, 12, 13, 14, 15, 16, 17, 18])
-    """Layer indices where steering can be applied."""
+    steering_layers: Optional[List[int]] = None
+    """Layer indices where steering can be applied. If None, auto-computed from num_layers."""
-    sensor_layer: int = 15
-    """Primary layer for gating decisions."""
+    sensor_layer: Optional[int] = None
+    """Primary layer for gating decisions. If None, auto-computed from num_layers."""
+    num_layers: Optional[int] = None
+    """Total layers in the model. Used to auto-compute steering_layers and sensor_layer."""
+    def resolve_layers(self, num_layers: int) -> None:
+        """Resolve steering_layers and sensor_layer based on model's num_layers."""
+        self.num_layers = num_layers
+        if self.sensor_layer is None:
+            # 75% through the network
+            self.sensor_layer = int(num_layers * 0.75)
+        if self.steering_layers is None:
+            # Middle to late layers (50% to 90% of network)
+            start = int(num_layers * 0.5)
+            end = int(num_layers * 0.9)
+            self.steering_layers = list(range(start, end))
     # Network architecture
-    gate_hidden_dim: int = 128
-    """Hidden dimension for gating network."""
+    gate_hidden_dim: Optional[int] = None
+    """Hidden dimension for gating network. If None, auto-computed as hidden_dim // 16."""
-    intensity_hidden_dim: int = 64
-    """Hidden dimension for intensity network."""
+    intensity_hidden_dim: Optional[int] = None
+    """Hidden dimension for intensity network. If None, auto-computed as hidden_dim // 32."""
+    def resolve_network_dims(self, hidden_dim: int) -> None:
+        """Resolve network dimensions based on model's hidden dimension."""
+        if self.gate_hidden_dim is None:
+            # Scale with model size, but clamp to reasonable range [32, 512]
+            self.gate_hidden_dim = max(32, min(512, hidden_dim // 16))
+        if self.intensity_hidden_dim is None:
+            # Scale with model size, but clamp to reasonable range [16, 256]
+            self.intensity_hidden_dim = max(16, min(256, hidden_dim // 32))
     # Training
     optimization_steps: int = 200
@@ -392,12 +416,15 @@ class TITANMethod(BaseSteeringMethod):
     def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
+        # steering_layers and sensor_layer default to None - resolved at training time
+        # based on actual num_layers in the model
         self.config = TITANConfig(
             num_directions=kwargs.get("num_directions", 5),
-            steering_layers=kwargs.get("steering_layers", [10, 11, 12, 13, 14, 15, 16, 17, 18]),
-            sensor_layer=kwargs.get("sensor_layer", 15),
-            gate_hidden_dim=kwargs.get("gate_hidden_dim", 128),
-            intensity_hidden_dim=kwargs.get("intensity_hidden_dim", 64),
+            steering_layers=kwargs.get("steering_layers", None),  # Auto-resolve from num_layers
+            sensor_layer=kwargs.get("sensor_layer", None),  # Auto-resolve from num_layers
+            num_layers=kwargs.get("num_layers", None),
+            gate_hidden_dim=kwargs.get("gate_hidden_dim", None),  # Auto-resolve from hidden_dim
+            intensity_hidden_dim=kwargs.get("intensity_hidden_dim", None),  # Auto-resolve from hidden_dim
             optimization_steps=kwargs.get("optimization_steps", 200),
             learning_rate=kwargs.get("learning_rate", 0.005),
             warmup_steps=kwargs.get("warmup_steps", 20),
@@ -429,8 +456,7 @@ class TITANMethod(BaseSteeringMethod):
             primary_map[layer] = result.get_effective_direction(layer)
         dtype = self.kwargs.get("dtype", None)
-        agg = self.kwargs.get("activation_aggregation_strategy", None)
-        return LayerActivations(primary_map, activation_aggregation_strategy=agg, dtype=dtype)
+        return LayerActivations(primary_map, dtype=dtype)
     def train_titan(self, pair_set: ContrastivePairSet) -> TITANResult:
         """
@@ -448,6 +474,21 @@ class TITANMethod(BaseSteeringMethod):
         if not buckets:
             raise InsufficientDataError(reason="No valid activation pairs found")
+        # Detect num_layers from available data if not set
+        # Find max layer index to determine model size
+        max_layer_idx = 0
+        for layer_name in buckets.keys():
+            try:
+                layer_idx = int(str(layer_name).split("_")[-1])
+                max_layer_idx = max(max_layer_idx, layer_idx)
+            except (ValueError, IndexError):
+                pass
+        # Resolve steering_layers and sensor_layer based on detected num_layers
+        detected_num_layers = max_layer_idx + 1  # layers are 0-indexed
+        if self.config.steering_layers is None or self.config.sensor_layer is None:
+            self.config.resolve_layers(detected_num_layers)
         # Filter to steering layers and determine hidden dim
         layer_names = []
         hidden_dim = None
@@ -472,6 +513,10 @@ class TITANMethod(BaseSteeringMethod):
         if not layer_names or hidden_dim is None:
             raise InsufficientDataError(reason="No valid steering layers found")
+        # Resolve network dimensions based on actual hidden_dim
+        if self.config.gate_hidden_dim is None or self.config.intensity_hidden_dim is None:
+            self.config.resolve_network_dims(hidden_dim)
         num_layers = len(layer_names)
         # Geometry analysis and adaptation

wisent/core/steering_methods/registry.py CHANGED Viewed

@@ -75,6 +75,7 @@ from wisent.core.steering_methods.core.atoms import BaseSteeringMethod
 class SteeringMethodType(Enum):
     """Enumeration of all supported steering methods."""
     CAA = "caa"
+    HYPERPLANE = "hyperplane"
     PRISM = "prism"
     PULSE = "pulse"
     TITAN = "titan"
@@ -190,6 +191,44 @@ CAA_DEFINITION = SteeringMethodDefinition(
 )
+HYPERPLANE_DEFINITION = SteeringMethodDefinition(
+    name="hyperplane",
+    method_type=SteeringMethodType.HYPERPLANE,
+    description="Classifier-based steering using logistic regression decision boundary. Works better than CAA when geometry is orthogonal (each pair has unique direction rather than shared direction).",
+    method_class_path="wisent.core.steering_methods.methods.hyperplane.HyperplaneMethod",
+    parameters=[
+        SteeringMethodParameter(
+            name="normalize",
+            type=bool,
+            default=True,
+            help="L2-normalize the steering vector",
+            action="store_true",
+            cli_flag="--hyperplane-normalize",
+        ),
+        SteeringMethodParameter(
+            name="max_iter",
+            type=int,
+            default=1000,
+            help="Maximum iterations for logistic regression",
+            cli_flag="--hyperplane-max-iter",
+        ),
+        SteeringMethodParameter(
+            name="C",
+            type=float,
+            default=1.0,
+            help="Regularization strength (inverse). Smaller values = stronger regularization.",
+            cli_flag="--hyperplane-C",
+        ),
+    ],
+    optimization_config={
+        "strength_search_range": (0.1, 5.0),
+        "default_strength": 1.0,
+    },
+    default_strength=1.0,
+    strength_range=(0.1, 5.0),
+)
 PRISM_DEFINITION = SteeringMethodDefinition(
     name="prism",
     method_type=SteeringMethodType.PRISM,
@@ -289,15 +328,15 @@ PULSE_DEFINITION = SteeringMethodDefinition(
         SteeringMethodParameter(
             name="sensor_layer",
             type=int,
-            default=15,
-            help="Layer index where condition gating is computed",
+            default=None,
+            help="Layer index where condition gating is computed (auto-computed if not set)",
             cli_flag="--pulse-sensor-layer",
         ),
         SteeringMethodParameter(
             name="steering_layers",
             type=str,
-            default="12,13,14,15,16,17,18",
-            help="Comma-separated layer indices where steering is applied",
+            default=None,
+            help="Comma-separated layer indices where steering is applied (auto-computed if not set)",
             cli_flag="--pulse-steering-layers",
         ),
         SteeringMethodParameter(
@@ -408,29 +447,29 @@ TITAN_DEFINITION = SteeringMethodDefinition(
         SteeringMethodParameter(
             name="steering_layers",
             type=str,
-            default="10,11,12,13,14,15,16,17,18",
-            help="Comma-separated layer indices for steering",
+            default=None,
+            help="Comma-separated layer indices for steering (auto-computed if not set)",
             cli_flag="--titan-steering-layers",
         ),
         SteeringMethodParameter(
             name="sensor_layer",
             type=int,
-            default=15,
-            help="Primary layer for gating decisions",
+            default=None,
+            help="Primary layer for gating decisions (auto-computed if not set)",
             cli_flag="--titan-sensor-layer",
         ),
         SteeringMethodParameter(
             name="gate_hidden_dim",
             type=int,
-            default=128,
-            help="Hidden dimension for gating network",
+            default=None,
+            help="Hidden dimension for gating network (auto-computed as hidden_dim//16 if not set)",
             cli_flag="--titan-gate-hidden-dim",
         ),
         SteeringMethodParameter(
             name="intensity_hidden_dim",
             type=int,
-            default=64,
-            help="Hidden dimension for intensity network",
+            default=None,
+            help="Hidden dimension for intensity network (auto-computed as hidden_dim//32 if not set)",
             cli_flag="--titan-intensity-hidden-dim",
         ),
         SteeringMethodParameter(
@@ -518,6 +557,7 @@ class SteeringMethodRegistry:
     _REGISTRY: Dict[str, SteeringMethodDefinition] = {
         "caa": CAA_DEFINITION,
+        "hyperplane": HYPERPLANE_DEFINITION,
         "prism": PRISM_DEFINITION,
         "pulse": PULSE_DEFINITION,
         "titan": TITAN_DEFINITION,

wisent/core/steering_optimizer.py CHANGED Viewed

@@ -26,8 +26,8 @@ from enum import Enum, auto
 from pathlib import Path
 from .config_manager import ModelConfigManager
-from .activations.core.atoms import ActivationAggregationStrategy
-from .activations.prompt_construction_strategy import PromptConstructionStrategy
+from .activations.extraction_strategy import ExtractionStrategy
 from wisent.core.errors import (
     MissingParameterError,
     SteeringMethodUnknownError,
@@ -60,22 +60,22 @@ class SteeringApplicationConfig:
     gaussian_width: float = 0.2
-def get_default_token_aggregation_strategies() -> List[ActivationAggregationStrategy]:
+def get_default_token_aggregation_strategies() -> List[ExtractionStrategy]:
     """Get token aggregation strategies to test."""
     return [
-        ActivationAggregationStrategy.LAST_TOKEN,
-        ActivationAggregationStrategy.MEAN_POOLING,
-        ActivationAggregationStrategy.FIRST_TOKEN,
-        ActivationAggregationStrategy.MAX_POOLING,
+        ExtractionStrategy.CHAT_LAST,
+        ExtractionStrategy.CHAT_MEAN,
+        ExtractionStrategy.CHAT_FIRST,
+        ExtractionStrategy.CHAT_MAX_NORM,
     ]
-def get_default_prompt_construction_strategies() -> List[PromptConstructionStrategy]:
+def get_default_prompt_construction_strategies() -> List[ExtractionStrategy]:
     """Get prompt construction strategies to test."""
     return [
-        PromptConstructionStrategy.CHAT_TEMPLATE,
-        PromptConstructionStrategy.DIRECT_COMPLETION,
-        PromptConstructionStrategy.INSTRUCTION_FOLLOWING,
+        ExtractionStrategy.CHAT_LAST,
+        ExtractionStrategy.CHAT_LAST,
+        ExtractionStrategy.CHAT_LAST,
     ]
@@ -399,8 +399,8 @@ class SteeringOptimizer:
         methods_to_test: Optional[List[SteeringMethod]] = None,
         layer_range: Optional[str] = None,
         strength_range: Optional[List[float]] = None,
-        token_aggregation_strategies: Optional[List[ActivationAggregationStrategy]] = None,
-        prompt_construction_strategies: Optional[List[PromptConstructionStrategy]] = None,
+        token_aggregation_strategies: Optional[List[ExtractionStrategy]] = None,
+        prompt_construction_strategies: Optional[List[ExtractionStrategy]] = None,
         steering_application_configs: Optional[List[SteeringApplicationConfig]] = None,
         limit: int = 100,
         max_time_minutes: float = 60.0,
@@ -603,8 +603,8 @@ class SteeringOptimizer:
         method: SteeringMethod,
         layer: int,
         strength: float,
-        token_aggregation: ActivationAggregationStrategy,
-        prompt_construction: PromptConstructionStrategy,
+        token_aggregation: ExtractionStrategy,
+        prompt_construction: ExtractionStrategy,
         steering_application: SteeringApplicationConfig,
         limit: int,
         split_ratio: float

wisent/core/trainers/steering_trainer.py CHANGED Viewed

@@ -10,9 +10,9 @@ import datetime as _dt
 from wisent.core.activations.core.atoms import (
     LayerActivations,
-    ActivationAggregationStrategy,
     RawActivationMap,
 )
+from wisent.core.activations.extraction_strategy import ExtractionStrategy
 from wisent.core.models.wisent_model import WisentModel
 from wisent.core.trainers.core.atoms import (
@@ -66,8 +66,7 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
         self,
         layers_spec: Sequence[str] | str | int | Sequence[int] | None,
         method_kwargs: dict[str, Any] | None = None,
-        aggregation: ActivationAggregationStrategy = ActivationAggregationStrategy.CONTINUATION_TOKEN,
-        return_full_sequence: bool = False,
+        strategy: ExtractionStrategy = ExtractionStrategy.CHAT_LAST,
         normalize_layers: bool = False,
         save_dir: str | Path | None = None,
         accept_low_quality_vector: bool = False,
@@ -87,16 +86,10 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
                 - range string "10-30" / "10..30"
                 - single int "12"
                 - None → use all available layers on the model
-            method:
-                Name of steering method ("caa").
             method_kwargs:
                 Dict of hyperparameters for the method (e.g., {"normalize": True, "scale": 1.0}).
-            aggregation:
-                ActivationAggregationStrategy to use during collection when not returning
-                full sequences. Ignored if 'return_full_sequence=True'.
-            return_full_sequence:
-                If True, store full [T,H] sequences per layer (method then must know how
-                to collapse to vectors). Default False (collect [H] vectors directly).
+            strategy:
+                ExtractionStrategy to use during collection.
             normalize_layers:
                 If True, L2-normalize activations layer-wise during collection.
             save_dir:
@@ -112,12 +105,11 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
         # 2) Collect activations for each pair
         for i, pair in enumerate(self.pair_set.pairs):
-            updated = self.collector.collect_for_pair(
+            updated = self.collector.collect(
                 pair,
+                strategy=strategy,
                 layers=layers,
-                aggregation=aggregation,
-                return_full_sequence=return_full_sequence,
-                normalize_layers=normalize_layers,
+                normalize=normalize_layers,
             )
             self.pair_set.pairs[i] = updated
@@ -221,8 +213,7 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
             "layers_used": layers or "all",
             "method": self.steering_method.name,
             "method_kwargs": method_kwargs,
-            "activation_aggregation_strategy": (None if return_full_sequence else aggregation),
-            "return_full_sequence": bool(return_full_sequence),
+            "extraction_strategy": strategy.value,
             "normalize_layers": bool(normalize_layers),
             "num_pairs": len(self.pair_set.pairs),
             "hidden_size": getattr(self.model, "hidden_size", None),
@@ -290,7 +281,7 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
         # Vectors
         raw_map: RawActivationMap = result.steered_vectors.to_dict()  # still tensors
-        cpu_map = {k: (v.detach().to("cpu") if isinstance(v, torch.Tensor) else v) for k, v in raw_map.items() if k != "_activation_aggregation_strategy"}
+        cpu_map = {k: (v.detach().to("cpu") if isinstance(v, torch.Tensor) else v) for k, v in raw_map.items()}
         torch.save(cpu_map, out / "steering_vectors.pt")
         # Summary (json-serializable)

wisent 0.7.379__py3-none-any.whl → 0.7.701__py3-none-any.whl

wisent 0.7.379py3-none-any.whl → 0.7.701py3-none-any.whl