PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl - Mend

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1020) hide show

wisent/core/geometry_search_space.py ADDED Viewed

@@ -0,0 +1,237 @@
+"""
+Configuration for geometry search space.
+Defines all parameters to search over when testing if a unified "goodness"
+direction exists across benchmarks.
+Strategy:
+- Extract activations for ALL layers once per (benchmark, strategy) pair
+- Cache activations to disk/memory
+- Test all layer combinations from cached activations (fast, just tensor math)
+- This reduces extraction time from O(layer_combos) to O(1) per benchmark
+"""
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any
+from enum import Enum
+from pathlib import Path
+import json
+from wisent.core.activations.extraction_strategy import ExtractionStrategy
+from wisent.core.utils.layer_combinations import get_layer_combinations
+from wisent.core.benchmark_registry import get_all_benchmarks
+from wisent.core.activations.activation_cache import ActivationCache, CachedActivations
+@dataclass
+class GeometrySearchConfig:
+    """Configuration for a single geometry search run."""
+    # Pairs settings
+    pairs_per_benchmark: int = 50
+    random_seed: int = 42
+    # Layer settings
+    max_layer_combo_size: int = 3
+    # Caching
+    cache_activations: bool = True
+    cache_dir: Optional[str] = None
+    # Estimation
+    estimated_time_per_extraction_seconds: float = 120.0  # ~2 min per (benchmark, strategy)
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "pairs_per_benchmark": self.pairs_per_benchmark,
+            "random_seed": self.random_seed,
+            "max_layer_combo_size": self.max_layer_combo_size,
+            "cache_activations": self.cache_activations,
+            "cache_dir": self.cache_dir,
+            "estimated_time_per_extraction_seconds": self.estimated_time_per_extraction_seconds,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "GeometrySearchConfig":
+        return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
+class GeometrySearchSpace:
+    """
+    Search space configuration for geometry testing.
+    Combines:
+    - Models to test
+    - Extraction strategies
+    - Layer combinations
+    - Benchmarks
+    With activation caching:
+    - Extract ALL layers once per (benchmark, strategy)
+    - Test layer combinations from cache (no re-extraction needed)
+    """
+    # Default models to test
+    DEFAULT_MODELS = [
+        "meta-llama/Llama-3.2-1B-Instruct",
+        "meta-llama/Llama-2-7b-chat-hf",
+        "Qwen/Qwen3-8B",
+        "openai/gpt-oss-20b",
+    ]
+    # Extraction strategies for instruct models
+    INSTRUCT_STRATEGIES = [
+        ExtractionStrategy.CHAT_MEAN,
+        ExtractionStrategy.CHAT_FIRST,
+        ExtractionStrategy.CHAT_LAST,
+        ExtractionStrategy.CHAT_MAX_NORM,
+        ExtractionStrategy.CHAT_WEIGHTED,
+        ExtractionStrategy.ROLE_PLAY,
+        ExtractionStrategy.MC_BALANCED,
+    ]
+    # Extraction strategies for base models
+    BASE_STRATEGIES = [
+        ExtractionStrategy.COMPLETION_LAST,
+        ExtractionStrategy.COMPLETION_MEAN,
+        ExtractionStrategy.MC_COMPLETION,
+    ]
+    def __init__(
+        self,
+        models: Optional[List[str]] = None,
+        strategies: Optional[List[ExtractionStrategy]] = None,
+        benchmarks: Optional[List[str]] = None,
+        config: Optional[GeometrySearchConfig] = None,
+    ):
+        """
+        Initialize the search space.
+        Args:
+            models: List of model names to test. Defaults to DEFAULT_MODELS.
+            strategies: List of extraction strategies. Defaults to INSTRUCT_STRATEGIES.
+            benchmarks: List of benchmarks. Defaults to all available benchmarks.
+            config: Search configuration (pairs, caching, etc.)
+        """
+        self.models = models or self.DEFAULT_MODELS
+        self.strategies = strategies or self.INSTRUCT_STRATEGIES
+        self.benchmarks = benchmarks or get_all_benchmarks()
+        self.config = config or GeometrySearchConfig()
+    def get_layer_combinations_for_model(self, model_name: str, num_layers: int) -> List[List[int]]:
+        """
+        Get all layer combinations to test for a given model.
+        Args:
+            model_name: Name of the model
+            num_layers: Number of layers in the model
+        Returns:
+            List of layer combinations
+        """
+        return get_layer_combinations(num_layers, self.config.max_layer_combo_size)
+    def get_extraction_count(self) -> int:
+        """
+        Calculate number of activation extractions needed (with caching).
+        With caching, we extract ALL layers once per (benchmark, strategy).
+        Layer combinations are tested from cache without re-extraction.
+        Returns:
+            Number of (benchmark, strategy) pairs = extraction operations
+        """
+        return len(self.benchmarks) * len(self.strategies)
+    def get_total_configurations(self, num_layers: int) -> int:
+        """
+        Calculate total number of configurations to test.
+        Total = strategies * layer_combos * benchmarks
+        (Layer combos are tested from cached activations)
+        """
+        from wisent.core.utils.layer_combinations import get_layer_combinations_count
+        layer_combos = get_layer_combinations_count(num_layers, self.config.max_layer_combo_size)
+        return len(self.strategies) * layer_combos * len(self.benchmarks)
+    def estimate_time_hours(self) -> float:
+        """
+        Estimate total time for geometry search (per model).
+        With caching:
+        - Extract once per (benchmark, strategy)
+        - Layer combo testing is fast (from cache)
+        Returns:
+            Estimated hours per model
+        """
+        extractions = self.get_extraction_count()
+        seconds = extractions * self.config.estimated_time_per_extraction_seconds
+        return seconds / 3600
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize to dictionary."""
+        return {
+            "models": self.models,
+            "strategies": [s.value for s in self.strategies],
+            "benchmarks": self.benchmarks,
+            "config": self.config.to_dict(),
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "GeometrySearchSpace":
+        """Deserialize from dictionary."""
+        strategies = [ExtractionStrategy(s) for s in data.get("strategies", [])]
+        config = GeometrySearchConfig.from_dict(data.get("config", {}))
+        return cls(
+            models=data.get("models"),
+            strategies=strategies if strategies else None,
+            benchmarks=data.get("benchmarks"),
+            config=config,
+        )
+    def summary(self) -> str:
+        """Return a human-readable summary of the search space."""
+        lines = [
+            "Geometry Search Space:",
+            f"  Models: {len(self.models)}",
+            f"  Strategies: {len(self.strategies)}",
+            f"  Benchmarks: {len(self.benchmarks)}",
+            f"  Pairs per benchmark: {self.config.pairs_per_benchmark}",
+            f"  Max layer combo size: {self.config.max_layer_combo_size}",
+            f"  Cache activations: {self.config.cache_activations}",
+            f"",
+            f"  Extractions needed (per model): {self.get_extraction_count()}",
+            f"  Estimated time (per model): {self.estimate_time_hours():.1f} hours",
+        ]
+        return "\n".join(lines)
+    def save(self, path: str) -> None:
+        """Save search space to JSON file."""
+        with open(path, "w") as f:
+            json.dump(self.to_dict(), f, indent=2)
+    @classmethod
+    def load(cls, path: str) -> "GeometrySearchSpace":
+        """Load search space from JSON file."""
+        with open(path) as f:
+            return cls.from_dict(json.load(f))
+# Default search space instance
+DEFAULT_SEARCH_SPACE = GeometrySearchSpace()
+if __name__ == "__main__":
+    # Print summary of default search space
+    space = GeometrySearchSpace()
+    print(space.summary())
+    print()
+    # Example with 16 layers (Llama-3.2-1B)
+    num_layers = 16
+    layer_combos = space.get_layer_combinations_for_model("test", num_layers)
+    print(f"For a {num_layers}-layer model:")
+    print(f"  Layer combinations: {len(layer_combos)}")
+    print(f"  Total configs to test: {space.get_total_configurations(num_layers)}")

wisent/core/hyperparameter_optimizer.py CHANGED Viewed

@@ -9,8 +9,8 @@ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_sc
 from .contrastive_pairs import ContrastivePairSet
 from .steering import SteeringMethod, SteeringType
 from .activations.activations_collector import ActivationCollector
-from .activations.core.atoms import ActivationAggregationStrategy
-from .activations.prompt_construction_strategy import PromptConstructionStrategy
+from .activations.extraction_strategy import ExtractionStrategy
 from wisent.core.errors import OptimizationError, NoActivationDataError, InsufficientDataError
 logger = logging.getLogger(__name__)
@@ -352,25 +352,25 @@ class HyperparameterOptimizer:
         # Map aggregation string to enum
         aggregation_map = {
-            'average': ActivationAggregationStrategy.MEAN_POOLING,
-            'first': ActivationAggregationStrategy.FIRST_TOKEN,
-            'last': ActivationAggregationStrategy.LAST_TOKEN,
-            'max': ActivationAggregationStrategy.MAX_POOLING,
+            'average': ExtractionStrategy.CHAT_MEAN,
+            'first': ExtractionStrategy.CHAT_FIRST,
+            'last': ExtractionStrategy.CHAT_LAST,
+            'max': ExtractionStrategy.CHAT_MAX_NORM,
         }
-        agg_strategy = aggregation_map.get(aggregation, ActivationAggregationStrategy.MEAN_POOLING)
+        agg_strategy = aggregation_map.get(aggregation, ExtractionStrategy.CHAT_MEAN)
         # Map prompt strategy string to enum
         prompt_strategy_map = {
-            'multiple_choice': PromptConstructionStrategy.MULTIPLE_CHOICE,
-            'role_playing': PromptConstructionStrategy.ROLE_PLAYING,
-            'direct_completion': PromptConstructionStrategy.DIRECT_COMPLETION,
-            'instruction_following': PromptConstructionStrategy.INSTRUCTION_FOLLOWING,
-            'chat_template': PromptConstructionStrategy.CHAT_TEMPLATE,
+            'multiple_choice': ExtractionStrategy.MC_BALANCED,
+            'role_playing': ExtractionStrategy.ROLE_PLAY,
+            'direct_completion': ExtractionStrategy.CHAT_LAST,
+            'instruction_following': ExtractionStrategy.CHAT_LAST,
+            'chat_template': ExtractionStrategy.CHAT_LAST,
         }
-        prompt_strategy = prompt_strategy_map.get(prompt_construction_strategy, PromptConstructionStrategy.CHAT_TEMPLATE)
+        prompt_strategy = prompt_strategy_map.get(prompt_construction_strategy, ExtractionStrategy.CHAT_LAST)
         # Create activation collector
-        collector = ActivationCollector(model=model, store_device="cpu")
+        collector = ActivationCollector(model=model)
         layer_str = str(layer)
         # Collect activations for training pairs

wisent/core/lm_eval_harness_ground_truth.py CHANGED Viewed

@@ -10,7 +10,7 @@ Evaluation uses the TEST portion (20%) to ensure no data leakage with training.
 import logging
 from typing import Any, Dict
-from wisent.core.activations.core.atoms import ActivationAggregationStrategy
+from wisent.core.activations.extraction_strategy import ExtractionStrategy
 from wisent.core.activations.activations import Activations
 from wisent.core.layer import Layer
 from wisent.core.utils.dataset_splits import get_all_docs_from_task, create_deterministic_split
@@ -674,16 +674,12 @@ class LMEvalHarnessGroundTruth:
         }
     def _map_token_aggregation_to_activation_method(self, token_aggregation: str):
-        """Map token aggregation string to activation method."""
-        mapping = {  # TODO This should be refactor, why we use strings as Token aggregation?
-            "average": ActivationAggregationStrategy.MEAN_POOLING,
-            "mean": ActivationAggregationStrategy.MEAN_POOLING,
-            "last": ActivationAggregationStrategy.LAST_TOKEN,
-            "max": ActivationAggregationStrategy.MAX_POOLING,
-        }
-        return mapping.get(token_aggregation.lower(), ActivationAggregationStrategy.MEAN_POOLING)
+        """Map token aggregation string to ExtractionStrategy."""
+        # Use ExtractionStrategy directly - token_aggregation should already be a valid enum value
+        try:
+            return ExtractionStrategy(token_aggregation)
+        except ValueError:
+            return ExtractionStrategy.CHAT_LAST
     def _is_task_interface_task(self, task_name: str) -> bool:
         """Check if this is a TaskInterface task (not an lm-eval task)."""

wisent/core/main.py CHANGED Viewed

@@ -12,6 +12,8 @@ from wisent.core.branding import print_banner
 from wisent.core.cli import execute_tasks, execute_generate_pairs_from_task, execute_generate_pairs, execute_diagnose_pairs, execute_get_activations, execute_diagnose_vectors, execute_create_steering_vector, execute_generate_vector_from_task, execute_generate_vector_from_synthetic, execute_optimize_classification, execute_optimize_steering, execute_optimize_sample_size, execute_generate_responses, execute_evaluate_responses, execute_multi_steer, execute_agent, execute_modify_weights, execute_evaluate_refusal, execute_inference_config, execute_optimization_cache, execute_optimize_weights, execute_optimize
 from wisent.core.cli.train_unified_goodness import execute_train_unified_goodness
 from wisent.core.cli.check_linearity import execute_check_linearity
+from wisent.core.cli.cluster_benchmarks import execute_cluster_benchmarks
+from wisent.core.cli.geometry_search import execute_geometry_search
 def _should_show_banner() -> bool:
@@ -92,6 +94,10 @@ def main():
         execute_train_unified_goodness(args)
     elif args.command == 'check-linearity':
         execute_check_linearity(args)
+    elif args.command == 'cluster-benchmarks':
+        execute_cluster_benchmarks(args)
+    elif args.command == 'geometry-search':
+        execute_geometry_search(args)
     else:
         print(f"\n✗ Command '{args.command}' is not yet implemented")
         sys.exit(1)

wisent/core/models/core/atoms.py CHANGED Viewed

@@ -7,6 +7,7 @@ import torch
 from typing import Mapping
 from wisent.core.errors import InvalidValueError, InvalidRangeError
+from wisent.core.utils.device import preferred_dtype
 if TYPE_CHECKING:
     from wisent.core.activations.core.atoms import RawActivationMap
@@ -213,12 +214,13 @@ class SteeringPlan:
         """
         if n < 0:
             raise InvalidRangeError(param_name="n", actual=n, min_val=0)
+        dtype = preferred_dtype()
         if n == 0:
-            return torch.empty(0, dtype=torch.float32)
+            return torch.empty(0, dtype=dtype)
         if weights is None:
-            return torch.full((n,), 1.0 / n, dtype=torch.float32)
+            return torch.full((n,), 1.0 / n, dtype=dtype)
-        w = torch.as_tensor(weights, dtype=torch.float32)
+        w = torch.as_tensor(weights, dtype=dtype)
         if w.numel() != n:
             raise InvalidValueError(param_name="weights length", actual=w.numel(), expected=f"{n} (number of activation maps)")
         s = float(w.sum())

wisent/core/models/wisent_model.py CHANGED Viewed

@@ -89,7 +89,7 @@ class WisentModel:
                 optional preloaded model (skips from_pretrained if provided).
         """
         self.model_name = model_name
-        self.device = device or resolve_default_device()
+        self.device = resolve_default_device() if device is None or device == "auto" else device
         # Determine appropriate dtype and settings for the device
         load_kwargs = {
@@ -154,12 +154,13 @@ class WisentModel:
         layers: list[nn.Module] = []
         candidates = [
-            "layers",
-            "model.layers",
-            "model.decoder.layers",
-            "transformer.h",
+            "layers",
+            "model.layers",
+            "model.decoder.layers",
+            "transformer.h",
             "base_model.model.layers",
-            "blocks", "model.blocks",
+            "blocks", "model.blocks",
+            "gpt_neox.layers",  # Pythia models
         ]
         for path in candidates:
             obj = m
@@ -516,7 +517,7 @@ class WisentModel:
                 return_tensors="pt",
                 padding=False,  # Single prompt, no padding needed
                 truncation=True,  # Avoid errors on long inputs
-                max_length=self.tokenizer.model_max_length  # Use model's actual limit
+                max_length=self.tokenizer.model_max_length,  # Use model's actual limit
             )
             # Move tensors to the correct device (same as _batch_encode does)
             batch = {
@@ -792,7 +793,7 @@ class WisentModel:
                 return_tensors="pt",
                 padding=False,  # Single prompt, no padding needed
                 truncation=True,  # Avoid errors on long inputs
-                max_length=self.tokenizer.model_max_length  # Use model's actual limit
+                max_length=self.tokenizer.model_max_length,  # Use model's actual limit
             )
             # Move tensors to the correct device (same as _batch_encode does)
             batch = {

wisent/core/opti/methods/opti_weights.py CHANGED Viewed

@@ -240,11 +240,14 @@ class WeightsOptimizer(BaseOptimizer):
                 norm_preserve=self.config.norm_preserve,
                 verbose=False,
             )
-        elif self.config.method == "additive":
+        elif self.config.method in ("additive", "titan", "prism", "pulse"):
             # Direct additive: add steering vector directly to weight matrices
-            # This is the simplest approach that worked in manual tests
+            # This modifies weights directly, not biases, so it persists when saved
+            # Used for additive and multi-direction methods (titan/prism/pulse)
             self._apply_direct_additive(params)
         else:
+            # Default fallback - use bake_steering_with_kernel
+            # Note: This adds biases which may not load correctly for some architectures
             bake_steering_with_kernel(
                 self.model,
                 self.steering_vectors,
@@ -376,6 +379,8 @@ class WeightsOptimizer(BaseOptimizer):
         checkpoint_interval: int = 5,
         output_dir: str | None = None,
         tokenizer: Any = None,
+        s3_bucket: str | None = None,
+        s3_key_prefix: str | None = None,
     ) -> HPORun:
         """
         Run optimization with checkpointing support.
@@ -436,11 +441,20 @@ class WeightsOptimizer(BaseOptimizer):
             if checkpoint_path and trial_num % checkpoint_interval == 0:
                 self._save_checkpoint(study, checkpoint_path)
                 print(f"   [Checkpoint saved at trial {trial_num}]")
+                # Upload checkpoint to S3
+                if s3_bucket and s3_key_prefix:
+                    self._upload_to_s3(checkpoint_path, s3_bucket, f"{s3_key_prefix}/checkpoint.json")
             # Save best model at intervals
             if output_dir and trial_num % checkpoint_interval == 0:
                 if study.best_trial is not None:
                     self._save_best_model_checkpoint(study, output_dir, tokenizer)
+                    # Upload best model checkpoint to S3
+                    if s3_bucket and s3_key_prefix:
+                        checkpoint_dir = os.path.join(output_dir, "checkpoint_best")
+                        self._upload_to_s3(checkpoint_dir, s3_bucket, f"{s3_key_prefix}/checkpoint_best/")
         # Run optimization with callback
         study.optimize(
@@ -521,3 +535,16 @@ class WeightsOptimizer(BaseOptimizer):
         }
         with open(os.path.join(checkpoint_dir, "checkpoint_metadata.json"), "w") as f:
             json.dump(metadata, f, indent=2)
+    def _upload_to_s3(self, local_path: str, s3_bucket: str, s3_key: str) -> bool:
+        """Upload a file or directory to S3."""
+        import subprocess
+        try:
+            if os.path.isdir(local_path):
+                cmd = ["aws", "s3", "sync", local_path, f"s3://{s3_bucket}/{s3_key}", "--quiet"]
+            else:
+                cmd = ["aws", "s3", "cp", local_path, f"s3://{s3_bucket}/{s3_key}", "--quiet"]
+            subprocess.run(cmd, check=True, capture_output=True)
+            return True
+        except Exception:
+            return False

wisent/core/optuna/classifier/activation_generator.py CHANGED Viewed

@@ -16,7 +16,7 @@ import numpy as np
 import torch
 from wisent.core.activations.activations_collector import ActivationCollector
-from wisent.core.activations.core.atoms import ActivationAggregationStrategy
+from wisent.core.activations.extraction_strategy import ExtractionStrategy
 from wisent.core.activations.activations import Activations
 logger = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ class ActivationData:
     activations: torch.Tensor
     labels: torch.Tensor
     layer: int
-    aggregation: ActivationAggregationStrategy
+    aggregation: ExtractionStrategy
     metadata: dict[str, Any]
     def to_numpy(self) -> tuple[np.ndarray, np.ndarray]:
@@ -102,7 +102,7 @@ class GenerationConfig:
     """Configuration for activation generation."""
     layer_search_range: tuple[int, int]
-    aggregation_methods: Optional[list[ActivationAggregationStrategy]] = None
+    aggregation_methods: Optional[list[ExtractionStrategy]] = None
     cache_dir: Optional[str] = None
     device: Optional[str] = None
     dtype: Optional[torch.dtype] = None  # Auto-detect if None
@@ -113,10 +113,10 @@ class GenerationConfig:
             self.cache_dir = "./activation_cache"
         if not self.aggregation_methods:
             self.aggregation_methods = [
-                ActivationAggregationStrategy.MEAN_POOLING,
-                ActivationAggregationStrategy.LAST_TOKEN,
-                ActivationAggregationStrategy.FIRST_TOKEN,
-                ActivationAggregationStrategy.MAX_POOLING,
+                ExtractionStrategy.CHAT_MEAN,
+                ExtractionStrategy.CHAT_LAST,
+                ExtractionStrategy.CHAT_FIRST,
+                ExtractionStrategy.CHAT_MAX_NORM,
             ]
@@ -239,7 +239,7 @@ class ActivationGenerator:
         return activation_data
     def _apply_batch_aggregation(
-        self, activations: torch.Tensor, strategy: ActivationAggregationStrategy
+        self, activations: torch.Tensor, strategy: ExtractionStrategy
     ) -> torch.Tensor:
         """
         Apply aggregation strategy to a batch of activations efficiently.
@@ -258,14 +258,16 @@ class ActivationGenerator:
             return activations
         if len(activations.shape) == 3:
             # [n_samples, n_tokens, hidden_dim] -> [n_samples, hidden_dim]
-            if strategy == ActivationAggregationStrategy.MEAN_POOLING:
+            if strategy == ExtractionStrategy.CHAT_MEAN:
                 return torch.mean(activations, dim=1)
-            if strategy == ActivationAggregationStrategy.LAST_TOKEN:
+            if strategy == ExtractionStrategy.CHAT_LAST:
                 return activations[:, -1, :]
-            if strategy == ActivationAggregationStrategy.FIRST_TOKEN:
+            if strategy == ExtractionStrategy.CHAT_FIRST:
                 return activations[:, 0, :]
-            if strategy == ActivationAggregationStrategy.MAX_POOLING:
+            if strategy == ExtractionStrategy.CHAT_MAX_NORM:
                 return torch.max(activations, dim=1)[0]
+            if strategy == ExtractionStrategy.CHAT_MEAN:
+                return torch.min(activations, dim=1)[0]
             # Default to mean pooling
             self.logger.warning(f"Unknown aggregation strategy {strategy}, using mean pooling")
             return torch.mean(activations, dim=1)

wisent/core/optuna/classifier/optuna_classifier_optimizer.py CHANGED Viewed

@@ -17,7 +17,7 @@ from optuna.pruners import MedianPruner
 from optuna.samplers import TPESampler
 from wisent.core.classifier.classifier import Classifier
-from wisent.core.utils.device import resolve_default_device
+from wisent.core.utils.device import resolve_default_device, preferred_dtype
 from wisent.core.errors import NoActivationDataError, ClassifierCreationError
 from .activation_generator import ActivationData, ActivationGenerator, GenerationConfig
@@ -44,7 +44,7 @@ def get_model_dtype(model) -> torch.dtype:
         return next(model_params).dtype
     except StopIteration:
         # Fallback if no parameters found
-        return torch.float32
+        return preferred_dtype()
 logger = logging.getLogger(__name__)

wisent/core/optuna/steering/steering_optimization.py CHANGED Viewed

@@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import torch
 from tqdm import tqdm
-from wisent.core.activations.core import ActivationAggregationStrategy
+from wisent.core.activations import ExtractionStrategy
 from wisent.core.classifier.classifier import Classifier
 from wisent.core.contrastive_pairs.contrastive_pair import ContrastivePair
 from wisent.core.contrastive_pairs.contrastive_pair_set import ContrastivePairSet
@@ -743,24 +743,29 @@ class SteeringOptimizer:
         # Apply aggregation strategy
         if (
             aggregation_strategy == "mean_pooling"
-            or aggregation_strategy == ActivationAggregationStrategy.MEAN_POOLING.value
+            or aggregation_strategy == ExtractionStrategy.CHAT_MEAN.value
         ):
             aggregated = torch.mean(activation_tensor, dim=1)  # [1, hidden_dim]
         elif (
             aggregation_strategy == "last_token"
-            or aggregation_strategy == ActivationAggregationStrategy.LAST_TOKEN.value
+            or aggregation_strategy == ExtractionStrategy.CHAT_LAST.value
         ):
             aggregated = activation_tensor[:, -1, :]  # [1, hidden_dim]
         elif (
             aggregation_strategy == "first_token"
-            or aggregation_strategy == ActivationAggregationStrategy.FIRST_TOKEN.value
+            or aggregation_strategy == ExtractionStrategy.CHAT_FIRST.value
         ):
             aggregated = activation_tensor[:, 0, :]  # [1, hidden_dim]
         elif (
             aggregation_strategy == "max_pooling"
-            or aggregation_strategy == ActivationAggregationStrategy.MAX_POOLING.value
+            or aggregation_strategy == ExtractionStrategy.CHAT_MAX_NORM.value
         ):
             aggregated = torch.max(activation_tensor, dim=1)[0]  # [1, hidden_dim]
+        elif (
+            aggregation_strategy == "min_pooling"
+            or aggregation_strategy == ExtractionStrategy.CHAT_MEAN.value
+        ):
+            aggregated = torch.min(activation_tensor, dim=1)[0]  # [1, hidden_dim]
         else:
             # Default to mean pooling if unknown
             self.logger.warning(f"Unknown aggregation strategy {aggregation_strategy}, using mean pooling")
@@ -1029,10 +1034,10 @@ class SteeringOptimizer:
             generation_config = GenerationConfig(
                 layer_search_range=(0, 23),  # Will be auto-detected from model
                 aggregation_methods=[
-                    ActivationAggregationStrategy.MEAN_POOLING,
-                    ActivationAggregationStrategy.LAST_TOKEN,
-                    ActivationAggregationStrategy.FIRST_TOKEN,
-                    ActivationAggregationStrategy.MAX_POOLING,
+                    ExtractionStrategy.CHAT_MEAN,
+                    ExtractionStrategy.CHAT_LAST,
+                    ExtractionStrategy.CHAT_FIRST,
+                    ExtractionStrategy.CHAT_MAX_NORM,
                 ],
                 cache_dir="./cache/steering_activations",
                 device=optimization_config.device,

wisent/core/parser_arguments/check_linearity_parser.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """Parser for check-linearity command."""
+from wisent.core.activations.extraction_strategy import ExtractionStrategy
 def setup_check_linearity_parser(parser):
     """Set up the check-linearity command parser."""
@@ -9,6 +11,14 @@ def setup_check_linearity_parser(parser):
         help='Path to JSON file containing contrastive pairs'
     )
+    parser.add_argument(
+        '--extraction-strategy',
+        type=str,
+        default=None,
+        choices=ExtractionStrategy.list_all(),
+        help=f'Extraction strategy to use. If not specified, tests multiple strategies. Options: {", ".join(ExtractionStrategy.list_all())}'
+    )
     parser.add_argument(
         '--model',
         type=str,
@@ -19,8 +29,8 @@ def setup_check_linearity_parser(parser):
     parser.add_argument(
         '--device',
         type=str,
-        default='cuda',
-        help='Device to run model on (cuda, mps, cpu)'
+        default='auto',
+        help='Device to run model on (auto, cuda, mps, cpu)'
     )
     parser.add_argument(

wisent 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl