PyPI - wisent - Versions diffs - 0.5.13__py3-none-any.whl → 0.5.15__py3-none-any.whl - Mend

wisent 0.5.13py3-none-any.whl → 0.5.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (62) hide show

wisent/__init__.py +1 -1
wisent/cli.py +114 -0
wisent/core/activations/activations_collector.py +19 -11
wisent/core/agent/__init__.py +1 -18
wisent/core/agent/diagnose/__init__.py +1 -55
wisent/core/cli/__init__.py +3 -1
wisent/core/cli/create_steering_vector.py +60 -18
wisent/core/cli/evaluate_responses.py +14 -8
wisent/core/cli/generate_pairs_from_task.py +18 -5
wisent/core/cli/get_activations.py +1 -1
wisent/core/cli/multi_steer.py +108 -0
wisent/core/cli/optimize_classification.py +187 -285
wisent/core/cli/optimize_sample_size.py +78 -0
wisent/core/cli/optimize_steering.py +354 -53
wisent/core/cli/tasks.py +274 -9
wisent/core/errors/__init__.py +0 -0
wisent/core/errors/error_handler.py +134 -0
wisent/core/evaluators/benchmark_specific/log_likelihoods_evaluator.py +152 -295
wisent/core/evaluators/rotator.py +22 -8
wisent/core/main.py +5 -1
wisent/core/model_persistence.py +4 -19
wisent/core/models/wisent_model.py +11 -3
wisent/core/parser.py +4 -3
wisent/core/parser_arguments/main_parser.py +1 -1
wisent/core/parser_arguments/multi_steer_parser.py +4 -3
wisent/core/parser_arguments/optimize_steering_parser.py +4 -0
wisent/core/sample_size_optimizer_v2.py +1 -1
wisent/core/steering_optimizer.py +2 -2
wisent/tests/__init__.py +0 -0
wisent/tests/examples/__init__.py +0 -0
wisent/tests/examples/cli/__init__.py +0 -0
wisent/tests/examples/cli/activations/__init__.py +0 -0
wisent/tests/examples/cli/activations/test_get_activations.py +127 -0
wisent/tests/examples/cli/classifier/__init__.py +0 -0
wisent/tests/examples/cli/classifier/test_classifier_examples.py +141 -0
wisent/tests/examples/cli/contrastive_pairs/__init__.py +0 -0
wisent/tests/examples/cli/contrastive_pairs/test_generate_pairs.py +89 -0
wisent/tests/examples/cli/evaluation/__init__.py +0 -0
wisent/tests/examples/cli/evaluation/test_evaluation_examples.py +117 -0
wisent/tests/examples/cli/generate/__init__.py +0 -0
wisent/tests/examples/cli/generate/test_generate_with_classifier.py +146 -0
wisent/tests/examples/cli/generate/test_generate_with_steering.py +149 -0
wisent/tests/examples/cli/generate/test_only_generate.py +110 -0
wisent/tests/examples/cli/multi_steering/__init__.py +0 -0
wisent/tests/examples/cli/multi_steering/test_multi_steer_from_trained_vectors.py +210 -0
wisent/tests/examples/cli/multi_steering/test_multi_steer_with_different_parameters.py +205 -0
wisent/tests/examples/cli/multi_steering/test_train_and_multi_steer.py +174 -0
wisent/tests/examples/cli/optimizer/__init__.py +0 -0
wisent/tests/examples/cli/optimizer/test_optimize_sample_size.py +102 -0
wisent/tests/examples/cli/optimizer/test_optimizer_examples.py +59 -0
wisent/tests/examples/cli/steering/__init__.py +0 -0
wisent/tests/examples/cli/steering/test_create_steering_vectors.py +135 -0
wisent/tests/examples/cli/synthetic/__init__.py +0 -0
wisent/tests/examples/cli/synthetic/test_synthetic_pairs.py +45 -0
{wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/METADATA +3 -1
{wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/RECORD +61 -31
wisent/core/agent/diagnose/test_synthetic_classifier.py +0 -71
/wisent/core/parser_arguments/{test_nonsense_parser.py → nonsense_parser.py} +0 -0
{wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/WHEEL +0 -0
{wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/entry_points.txt +0 -0
{wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/licenses/LICENSE +0 -0
{wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/top_level.txt +0 -0

wisent/core/evaluators/benchmark_specific/log_likelihoods_evaluator.py CHANGED Viewed

@@ -1,329 +1,186 @@
-"""
-Log-Likelihoods Ground Truth Evaluator
+"""Log Likelihoods Evaluator for multiple choice tasks.
-This module handles ground truth evaluation for log-likelihoods based tasks,
-typically used for multiple choice questions. Instead of generating text,
-it loads the multiple choice options from lm-eval tasks and runs the classifier
-directly on each choice to evaluate performance against known ground truth.
+This evaluator handles tasks like BoolQ, MMLU, ARC where evaluation is done
+by comparing log likelihoods of different answer choices rather than generating text.
+Works with steering by computing log probabilities with steering applied.
 """
 import logging
-from typing import Any, Dict, Optional
-from dataclasses import dataclass
+import torch
+from typing import Any, List
-from wisent.core.activations.core.atoms import ActivationAggregationStrategy
-from wisent.core.activations.activations import Activations
+from wisent.core.evaluators.core.atoms import BaseEvaluator, EvalResult
+from wisent.core.errors.error_handler import (
+    ModelNotProvidedError,
+    validate_choices,
+    require_all_parameters
+)
 logger = logging.getLogger(__name__)
-@dataclass
-class Layer:
-    """Simple layer metadata class."""
-    index: int
-    type: str = "transformer"
+class LogLikelihoodsEvaluator(BaseEvaluator):
+    """Evaluator for multiple choice tasks using log likelihood comparison.
+    Compatible with:
+    - BoolQ: Boolean questions with yes/no choices
+    - MMLU: Multiple choice questions
+    - ARC: Science questions with multiple choices
+    - Any task requiring log likelihood comparison
-class LogLikelihoodsEvaluator:
+    This evaluator computes the log likelihood of each choice and selects
+    the one with the highest probability. Can apply steering before computing
+    log likelihoods.
     """
-    Evaluator for log-likelihoods based ground truth assessment.
-    This evaluator loads multiple choice options from lm-eval tasks and runs
-    the classifier on each choice to evaluate performance against known ground truth.
-    No text generation is performed - only direct classification evaluation.
-    """
+    name = "log_likelihoods"
+    description = "Log likelihood evaluator for multiple choice tasks"
+    task_names = ("boolq", "mmlu", "arc_easy", "arc_challenge", "truthfulqa_mc1", "truthfulqa_mc2")
-    def __init__(self, task_name: Optional[str] = None, model=None):
-        """
-        Initialize the log-likelihoods evaluator.
+    def __init__(self, model=None):
+        """Initialize with optional model for log likelihood computation.
         Args:
-            task_name: Name of the task (e.g., "truthfulqa_mc1", "mmlu", etc.)
-            model: The model instance used to extract activations
+            model: WisentModel instance that can compute log likelihoods
         """
-        self.task_name = task_name
         self.model = model
-    def evaluate_classifier_on_task(
-        self,
-        classifier,
-        task_name: str,
-        num_samples: int = 100,
-        model=None,
-        layer: int = 15,
-        token_aggregation: str = "average",
-    ) -> Dict[str, Any]:
-        """
-        Evaluate a classifier on a log-likelihoods task by running it on multiple choice options.
+    def evaluate(self, response: str, expected: Any, **kwargs) -> EvalResult:
+        """Evaluate using log likelihood comparison of choices.
         Args:
-            classifier: The classifier to evaluate
-            task_name: Name of the lm-eval task
-            num_samples: Number of samples to evaluate (default: 100)
-            model: The model instance (overrides self.model if provided)
-            layer: Layer to extract activations from (default: 15)
-            token_aggregation: Token aggregation method ("average", "final", "first", "max", "min")
+            response: Not used for log likelihood evaluation
+            expected: Expected answer
+            **kwargs:
+                model: WisentModel instance (REQUIRED)
+                question: The question/context (REQUIRED)
+                choices: List of answer choices (REQUIRED)
+                steering_plan: Optional steering plan to apply
         Returns:
-            Dict containing evaluation results
+            EvalResult with TRUTHFUL/UNTRUTHFUL
+        Raises:
+            ModelNotProvidedError: If model is not provided
+            MissingParameterError: If question is not provided
+            InvalidChoicesError: If choices are invalid or missing
         """
+        model = kwargs.get('model') or self.model
+        question = kwargs.get('question')
+        choices = kwargs.get('choices')
+        steering_plan = kwargs.get('steering_plan')
+        task_name = kwargs.get('task_name', 'unknown')
+        # NO FALLBACKS - require all parameters
+        if not model:
+            raise ModelNotProvidedError(evaluator_name=self.name, task_name=task_name)
+        require_all_parameters(
+            {'question': question},
+            context=f"{self.name} evaluator",
+            task_name=task_name
+        )
+        validate_choices(choices, task_name=task_name, min_choices=2)
+        return self._evaluate_log_likelihood(
+            model, question, choices, expected, steering_plan
+        )
+    def _evaluate_log_likelihood(
+        self, model, question: str, choices: List[str], expected: Any, steering_plan=None
+    ) -> EvalResult:
+        """Evaluate by comparing log likelihoods of choices."""
         try:
-            # Use provided model or fall back to self.model
-            evaluation_model = model or self.model
-            if evaluation_model is None:
-                return self._error_result("No model provided for activation extraction")
-            logger.info(f"Loading task data for {task_name}...")
-            # Use existing task loading infrastructure
-            task_data = evaluation_model.load_lm_eval_task(task_name, shots=0, limit=num_samples)
-            docs, _ = evaluation_model.split_task_data(task_data, split_ratio=1.0)  # Use all for evaluation
-            if not docs:
-                return self._error_result(f"No documents retrieved from task: {task_name}")
-            logger.info(f"Retrieved {len(docs)} documents from {task_name}")
-            # Use existing QA extraction infrastructure (task-agnostic)
-            from .contrastive_pairs.contrastive_pair_set import ContrastivePairSet
-            qa_pairs = ContrastivePairSet.extract_qa_pairs_from_task_docs(task_name, task_data, docs)
-            if not qa_pairs:
-                return self._error_result(f"No QA pairs could be extracted from task: {task_name}")
-            logger.info(f"Extracted {len(qa_pairs)} QA pairs from {task_name}")
-            # Use existing contrastive pair creation infrastructure
-            from wisent.core.activations.activation_collection_method import (
-                ActivationCollectionLogic,
-            )
-            from wisent.core.activations.prompts import PromptConstructionStrategy
-            collector = ActivationCollectionLogic(model=evaluation_model)
-            # For evaluation, use DIRECT_COMPLETION instead of MULTIPLE_CHOICE
-            # This creates prompts like "Q" -> "good_resp"/"bad_resp" instead of "Which is better: Q A. bad B. good"
-            logger.info("🔍 EVALUATION MODE: Using DIRECT_COMPLETION prompt strategy instead of MULTIPLE_CHOICE")
-            contrastive_pairs = collector.create_batch_contrastive_pairs(
-                qa_pairs, prompt_strategy=PromptConstructionStrategy.DIRECT_COMPLETION
-            )
-            if not contrastive_pairs:
-                return self._error_result("No contrastive pairs could be created from QA pairs")
-            logger.info(f"Created {len(contrastive_pairs)} contrastive pairs")
-            # Map token aggregation to token targeting strategy for evaluation
-            targeting_strategy_mapping = {  # TODO Refactor - we should stay with one standard
-                "average": ActivationAggregationStrategy.MEAN_POOLING,
-                "final": ActivationAggregationStrategy.LAST_TOKEN,
-                "first": ActivationAggregationStrategy.FIRST_TOKEN,
-                "max": ActivationAggregationStrategy.MAX_POOLING,
-                "min": ActivationAggregationStrategy.MEAN_POOLING,  # Fallback to mean
-            }
-            targeting_strategy = targeting_strategy_mapping.get(
-                token_aggregation, ActivationAggregationStrategy.MEAN_POOLING
-            )
-            logger.info(
-                f"🔍 EVALUATION MODE: Using {targeting_strategy.value} targeting strategy (from token_aggregation: {token_aggregation})"
-            )
-            logger.info("🎯 ACTIVATION COLLECTION PARAMS:")
-            logger.info(f"   • Layer: {layer}")
-            logger.info(f"   • Device: {evaluation_model.device}")
-            logger.info(f"   • Token targeting: {targeting_strategy.value}")
-            logger.info(f"   • Pairs count: {len(contrastive_pairs)}")
-            processed_pairs = collector.collect_activations_batch(
-                pairs=contrastive_pairs,
-                layer_index=layer,
-                device=evaluation_model.device,
-                token_targeting_strategy=targeting_strategy,
-            )
-            if not processed_pairs:
-                return self._error_result("No activations could be extracted from contrastive pairs")
-            logger.info(f"Extracted activations from {len(processed_pairs)} pairs")
-            # Debug: Show where activations are collected from
-            if processed_pairs:
-                sample_pair = processed_pairs[0]
-                logger.info("📍 DETAILED ACTIVATION COLLECTION ANALYSIS:")
-                logger.info(f"   🔧 Sample pair type: {type(sample_pair).__name__}")
-                logger.info(
-                    f"   🔧 Pair attributes: {[attr for attr in dir(sample_pair) if not attr.startswith('_')][:8]}..."
-                )
-                if hasattr(sample_pair, "positive_activations") and sample_pair.positive_activations is not None:
-                    logger.info(f"   ✅ Positive activations shape: {sample_pair.positive_activations.shape}")
-                if hasattr(sample_pair, "negative_activations") and sample_pair.negative_activations is not None:
-                    logger.info(f"   ✅ Negative activations shape: {sample_pair.negative_activations.shape}")
-                if hasattr(sample_pair, "_prompt_pair") and sample_pair._prompt_pair:
-                    logger.debug(f"   🔸 Positive prompt: {sample_pair._prompt_pair.positive_prompt[:100]}...")
-                    logger.debug(f"   🔸 Negative prompt: {sample_pair._prompt_pair.negative_prompt[:100]}...")
-                    logger.debug(f"   🎯 Target token: {sample_pair._prompt_pair.target_token}")
-                    logger.debug(f"   📊 Prompt strategy: {sample_pair._prompt_strategy.value}")
-                    logger.info(f"   🔍 Token targeting: {targeting_strategy.value} (evaluation mode)")
-                elif hasattr(sample_pair, "prompt") and hasattr(sample_pair, "positive_response"):
-                    logger.debug(f"   🔸 Question prompt: {sample_pair.prompt[:100]}...")
-                    logger.debug(f"   ✅ Positive response: {sample_pair.positive_response[:50]}...")
-                    logger.debug(f"   ❌ Negative response: {sample_pair.negative_response[:50]}...")
-                    logger.debug(
-                        f"   🔍 Token targeting used: {targeting_strategy.value} (from CLI token_aggregation: {token_aggregation})"
-                    )
-                else:
-                    logger.info("   📍 ACTIVATION COLLECTION: Unknown format - investigating...")
-                    logger.info(
-                        f"   🔧 All attributes: {[attr for attr in dir(sample_pair) if not attr.startswith('__')]}"
-                    )
-            # Map token aggregation to activation method
-            activation_method = token_aggregation
-            # Handle both string and enum types
-            method_name = activation_method.value if hasattr(activation_method, 'value') else str(activation_method)
-            logger.info(
-                f"🎯 Using activation aggregation method: {method_name} (from token_aggregation: {token_aggregation})"
+            # Apply steering if provided
+            if steering_plan:
+                model.attach(steering_plan)
+            # Compute log likelihood for each choice
+            log_probs = []
+            for choice in choices:
+                log_prob = self._compute_choice_log_likelihood(model, question, choice)
+                log_probs.append(log_prob)
+            # Detach steering
+            if steering_plan:
+                model.detach()
+            # Select choice with highest log likelihood
+            predicted_idx = log_probs.index(max(log_probs))
+            predicted_choice = choices[predicted_idx]
+            # Normalize expected answer for comparison
+            expected_normalized = str(expected).strip().lower()
+            predicted_normalized = predicted_choice.strip().lower()
+            is_correct = predicted_normalized == expected_normalized
+            return EvalResult(
+                ground_truth="TRUTHFUL" if is_correct else "UNTRUTHFUL",
+                method_used=self.name,
+                confidence=1.0 if is_correct else 0.0,
+                details=f"Predicted: '{predicted_choice}' (log_prob={log_probs[predicted_idx]:.3f}), Expected: '{expected}'",
+                meta={
+                    "predicted": predicted_choice,
+                    "expected": expected,
+                    "log_probs": {choice: lp for choice, lp in zip(choices, log_probs)},
+                }
             )
-            # Evaluate classifier on each sample
-            results = []
-            total_correct = 0
-            total_samples = 0
-            for i, pair in enumerate(processed_pairs):
-                try:
-                    sample_result = self._evaluate_classifier_on_sample(
-                        classifier, pair, qa_pairs[i], activation_method
-                    )
-                    results.append(sample_result)
-                    if sample_result.get("classifier_correct", False):
-                        total_correct += 1
-                    total_samples += 1
-                except Exception as e:
-                    logger.error(f"Error evaluating sample {i}: {e}")
-                    continue
-            # Calculate overall metrics
-            accuracy = total_correct / total_samples if total_samples > 0 else 0.0
-            return {
-                "ground_truth": "EVALUATED",
-                "method_used": "log-likelihoods-classifier",
-                "confidence": accuracy,
-                "details": f"Evaluated {total_samples} samples with {total_correct} correct predictions",
-                "task_name": task_name,
-                "evaluation_method": "log-likelihoods",
-                "lm_eval_metrics": {
-                    "accuracy": accuracy,
-                    "correct_predictions": total_correct,
-                    "total_samples": total_samples,
-                },
-                "sample_results": results[:10],  # First 10 for debugging
-            }
         except Exception as e:
+            logger.error(f"Error in log likelihood evaluation: {e}")
             import traceback
+            logger.error(traceback.format_exc())
+            # NO FALLBACK - raise the error
+            raise
-            logger.error(f"Error evaluating classifier on task {task_name}: {e}")
-            logger.error(f"Traceback: {traceback.format_exc()}")
-            return self._error_result(f"Evaluation error: {e!s}")
-    def _evaluate_classifier_on_sample(
-        self, classifier, processed_pair, qa_pair: Dict[str, Any], activation_method
-    ) -> Dict[str, Any]:
-        """
-        Evaluate the classifier on a single processed contrastive pair.
+    def _compute_choice_log_likelihood(self, model, question: str, choice: str) -> float:
+        """Compute log likelihood of a choice given a question.
         Args:
-            classifier: The classifier to evaluate
-            processed_pair: ContrastivePair with activations already extracted
-            qa_pair: Original QA pair data for reference
-            activation_method:
+            model: WisentModel instance
+            question: The question/context
+            choice: The answer choice
         Returns:
-            Dict containing evaluation results for this sample
+            Log likelihood (higher = more likely)
         """
-        try:
-            # Extract activations from the processed pair
-            positive_activations = processed_pair.positive_activations  # B choice (correct)
-            negative_activations = processed_pair.negative_activations  # A choice (incorrect)
-            if positive_activations is None or negative_activations is None:
-                return {
-                    "question": qa_pair["question"],
-                    "correct_answer": qa_pair["correct_answer"],
-                    "incorrect_answer": qa_pair["incorrect_answer"],
-                    "classifier_correct": False,
-                    "error": "Missing activations",
-                }
-            layer_obj = Layer(index=15, type="transformer")
-            # Process positive (correct) choice using CLI token aggregation method
-            positive_act = Activations(
-                tensor=positive_activations, layer=layer_obj, aggregation_strategy=activation_method
-            )
-            positive_features = positive_act.extract_features_for_classifier()
-            positive_prediction = classifier.predict_proba([positive_features.cpu().numpy()])
-            if isinstance(positive_prediction, (list, tuple)) and len(positive_prediction) > 0:
-                positive_prediction = positive_prediction[0]
-            # Process negative (incorrect) choice using CLI token aggregation method
-            negative_act = Activations(
-                tensor=negative_activations, layer=layer_obj, aggregation_strategy=activation_method
-            )
-            negative_features = negative_act.extract_features_for_classifier()
-            negative_prediction = classifier.predict_proba([negative_features.cpu().numpy()])
-            if isinstance(negative_prediction, (list, tuple)) and len(negative_prediction) > 0:
-                negative_prediction = negative_prediction[0]
-            # Evaluate classifier performance
-            # For truthfulness: lower scores = more truthful/correct
-            # Classifier should give lower score to correct answer, higher to incorrect
-            classifier_correct = positive_prediction < negative_prediction
-            # Enhanced logging to show classifier vs normal predictions
-            logger.info("📋 Sample Evaluation:")
-            logger.debug(f"   🔸 Question: {qa_pair['question'][:100]}...")
-            logger.info("   🧠 CLASSIFIER PREDICTION:")
-            logger.debug(f"      ✅ Correct answer score: {float(positive_prediction):.3f} (lower = more truthful)")
-            logger.debug(f"      ❌ Incorrect answer score: {float(negative_prediction):.3f} (higher = less truthful)")
-            logger.debug(
-                f"      🎯 Classifier judgment: {'CORRECT' if classifier_correct else 'INCORRECT'} (positive {'<' if classifier_correct else '>='} negative)"
-            )
-            return {
-                "question": qa_pair["question"],
-                "correct_answer": qa_pair["correct_answer"],
-                "incorrect_answer": qa_pair["incorrect_answer"],
-                "positive_prediction": float(positive_prediction),
-                "negative_prediction": float(negative_prediction),
-                "classifier_correct": classifier_correct,
-            }
-        except Exception as e:
-            logger.debug(f"Error evaluating sample: {e}")
-            return {
-                "question": qa_pair.get("question", "Unknown"),
-                "correct_answer": qa_pair.get("correct_answer", "Unknown"),
-                "incorrect_answer": qa_pair.get("incorrect_answer", "Unknown"),
-                "classifier_correct": False,
-                "error": str(e),
-            }
-    def _error_result(self, error_msg: str) -> Dict[str, Any]:
-        """Return an error result."""
-        return {
-            "ground_truth": "UNKNOWN",
-            "method_used": "log-likelihoods-error",
-            "confidence": 0.0,
-            "details": error_msg,
-            "task_name": self.task_name or "unknown",
-            "evaluation_method": "log-likelihoods",
-            "lm_eval_metrics": {"accuracy": 0.0, "correct_predictions": 0, "total_samples": 0},
-        }
+        # Format as: question + choice
+        full_text = f"{question}\n{choice}"
+        # Tokenize question and choice separately
+        question_inputs = model.tokenizer(question, return_tensors="pt", add_special_tokens=True).to(model.device)
+        choice_tokens = model.tokenizer(choice, return_tensors="pt", add_special_tokens=False).to(model.device)
+        # Get model logits for the full sequence
+        with torch.no_grad():
+            # Tokenize full sequence
+            full_inputs = model.tokenizer(full_text, return_tensors="pt", add_special_tokens=True).to(model.device)
+            outputs = model.hf_model(**full_inputs)
+            logits = outputs.logits
+            # Compute log probability of the choice tokens
+            # logits shape: [batch, seq_len, vocab_size]
+            # We want log prob of choice tokens given question
+            question_len = question_inputs.input_ids.shape[1]
+            choice_len = choice_tokens.input_ids.shape[1]
+            # Get logits at positions where we're predicting choice tokens
+            log_prob = 0.0
+            for i in range(choice_len):
+                # Position in full sequence where we predict token i of choice
+                # Subtract 1 because we predict the next token
+                pos = question_len + i - 1
+                if pos >= 0 and pos < logits.shape[1]:
+                    token_logits = logits[0, pos, :]  # Logits at this position
+                    token_log_probs = torch.nn.functional.log_softmax(token_logits, dim=-1)
+                    # Get log prob of the actual choice token at this position
+                    actual_token_id = choice_tokens.input_ids[0, i]
+                    log_prob += token_log_probs[actual_token_id].item()
+            # Normalize by length to avoid bias toward shorter choices
+            normalized_log_prob = log_prob / max(choice_len, 1)
+            return normalized_log_prob

wisent/core/evaluators/rotator.py CHANGED Viewed

@@ -25,8 +25,8 @@ class EvaluatorRotator:
     ) -> None:
         if autoload:
             self.discover_evaluators(evaluators_location)
+        self._task_name = task_name  # Set before resolving
         self._evaluator = self._resolve_evaluator(evaluator)
-        self._task_name = task_name
     @staticmethod
     def discover_evaluators(location: Union[str, Path] = "wisent.core.evaluators.oracles") -> None:
@@ -93,17 +93,31 @@ class EvaluatorRotator:
             )
         return sorted(out, key=lambda x: x["name"])
-    @staticmethod
     def _resolve_evaluator(
+        self,
         evaluator: Union[str, BaseEvaluator, Type[BaseEvaluator], None]
     ) -> BaseEvaluator:
         if evaluator is None:
-            registry = BaseEvaluator.list_registered()
-            if "lm_eval" in registry:
-                return registry["lm_eval"]()
-            if registry:
-                return next(iter(registry.values()))()
-            raise EvaluatorError("No evaluators registered.")
+            # Auto-select based on task_name if provided
+            if self._task_name:
+                registry = BaseEvaluator.list_registered()
+                for name, cls in registry.items():
+                    task_names = getattr(cls, 'task_names', ())
+                    if self._task_name in task_names:
+                        logger.info(f"Auto-selected evaluator '{name}' for task '{self._task_name}'")
+                        return cls()
+                # NO FALLBACK - raise error if no evaluator found for task
+                raise EvaluatorError(
+                    f"No evaluator found for task '{self._task_name}'. "
+                    f"Available evaluators: {list(registry.keys())}. "
+                    f"Please specify an evaluator explicitly or add task_names to an evaluator."
+                )
+            # NO FALLBACK - if no task_name and no evaluator, require explicit selection
+            raise EvaluatorError(
+                "No evaluator specified and no task_name provided. "
+                "Either provide an evaluator name or a task_name for auto-selection."
+            )
         if isinstance(evaluator, BaseEvaluator):
             return evaluator
         if inspect.isclass(evaluator) and issubclass(evaluator, BaseEvaluator):

wisent/core/main.py CHANGED Viewed

@@ -8,7 +8,7 @@ and provides the main() function that serves as the CLI entry point.
 import sys
 from wisent.core.parser_arguments import setup_parser
 from wisent.core.branding import print_banner
-from wisent.core.cli import execute_tasks, execute_generate_pairs_from_task, execute_generate_pairs, execute_get_activations, execute_create_steering_vector, execute_generate_vector_from_task, execute_generate_vector_from_synthetic, execute_optimize_classification, execute_optimize_steering, execute_generate_responses, execute_evaluate_responses
+from wisent.core.cli import execute_tasks, execute_generate_pairs_from_task, execute_generate_pairs, execute_get_activations, execute_create_steering_vector, execute_generate_vector_from_task, execute_generate_vector_from_synthetic, execute_optimize_classification, execute_optimize_steering, execute_optimize_sample_size, execute_generate_responses, execute_evaluate_responses, execute_multi_steer
 def main():
@@ -44,10 +44,14 @@ def main():
         execute_optimize_classification(args)
     elif args.command == 'optimize-steering':
         execute_optimize_steering(args)
+    elif args.command == 'optimize-sample-size':
+        execute_optimize_sample_size(args)
     elif args.command == 'generate-responses':
         execute_generate_responses(args)
     elif args.command == 'evaluate-responses':
         execute_evaluate_responses(args)
+    elif args.command == 'multi-steer':
+        execute_multi_steer(args)
     else:
         print(f"\n✗ Command '{args.command}' is not yet implemented")
         sys.exit(1)

wisent/core/model_persistence.py CHANGED Viewed

@@ -33,16 +33,8 @@ class ModelPersistence:
         if save_dir:
             os.makedirs(save_dir, exist_ok=True)
-        # Split path and sanitize only the filename part
-        directory = os.path.dirname(save_path)
-        filename = os.path.basename(save_path)
-        # Sanitize filename to handle periods in model names
-        safe_filename = filename.replace('.', '_')
-        safe_path = os.path.join(directory, safe_filename)
-        # Add layer suffix to filename
-        base, ext = os.path.splitext(safe_path)
-        classifier_path = f"{base}_layer_{layer}{ext or '.pkl'}"
+        # Use the exact path provided by the user
+        classifier_path = save_path
         # Prepare data to save
         save_data = {
@@ -69,15 +61,8 @@ class ModelPersistence:
         Returns:
             Tuple of (classifier, metadata)
         """
-        # Split path and sanitize only the filename part to match save format
-        directory = os.path.dirname(load_path)
-        filename = os.path.basename(load_path)
-        safe_filename = filename.replace('.', '_')
-        safe_path = os.path.join(directory, safe_filename)
-        # Add layer suffix to filename
-        base, ext = os.path.splitext(safe_path)
-        classifier_path = f"{base}_layer_{layer}{ext or '.pkl'}"
+        # Use the exact path provided by the user
+        classifier_path = load_path
         if not os.path.exists(classifier_path):
             raise FileNotFoundError(f"Classifier file not found: {classifier_path}")

wisent/core/models/wisent_model.py CHANGED Viewed

@@ -284,9 +284,17 @@ class WisentModel:
             {"input_ids": tensor([[...]]), "attention_mask": tensor([[...]])}
         """
-        ids = self.tokenizer.apply_chat_template(
-            message, tokenize=True, add_generation_prompt=add_generation_prompt, enable_thinking=enable_thinking, return_tensors="pt"
-        )[0]
+        try:
+            ids = self.tokenizer.apply_chat_template(
+                message, tokenize=True, add_generation_prompt=add_generation_prompt, enable_thinking=enable_thinking, return_tensors="pt"
+            )[0]
+        except ValueError as e:
+            if "chat_template is not set" in str(e):
+                # Fallback for models without chat templates: concatenate messages
+                text = " ".join([msg.get("content", "") for msg in message if isinstance(msg, dict)])
+                ids = self.tokenizer.encode(text, return_tensors="pt")[0]
+            else:
+                raise
         return {
             "input_ids": ids,
             "attention_mask": torch.ones_like(ids),

wisent/core/parser.py CHANGED Viewed

@@ -1590,14 +1590,15 @@ def setup_multi_steer_parser(parser):
         "--vector",
         type=str,
         action="append",
-        required=True,
+        required=False,
+        default=None,
         metavar="PATH:WEIGHT",
-        help="Path to steering vector and its weight (format: path/to/vector.pt:0.5). Can be specified multiple times.",
+        help="Path to steering vector and its weight (format: path/to/vector.pt:0.5). Can be specified multiple times. If omitted, generates unsteered baseline.",
     )
     # Model configuration
     parser.add_argument("--model", type=str, required=True, help="Model name or path")
-    parser.add_argument("--layer", type=int, required=True, help="Layer index to apply combined steering")
+    parser.add_argument("--layer", type=int, required=False, default=None, help="Layer index to apply combined steering (required when using vectors)")
     parser.add_argument("--device", type=str, default=None, help="Device to run on (default: auto-detect)")
     # Steering method configuration

wisent/core/parser_arguments/main_parser.py CHANGED Viewed

@@ -15,7 +15,7 @@ from wisent.core.parser_arguments.create_steering_vector_parser import setup_cre
 from wisent.core.parser_arguments.generate_vector_from_task_parser import setup_generate_vector_from_task_parser
 from wisent.core.parser_arguments.generate_vector_from_synthetic_parser import setup_generate_vector_from_synthetic_parser
 from wisent.core.parser_arguments.synthetic_parser import setup_synthetic_parser
-from wisent.core.parser_arguments.test_nonsense_parser import setup_test_nonsense_parser
+from wisent.core.parser_arguments.nonsense_parser import setup_test_nonsense_parser
 from wisent.core.parser_arguments.monitor_parser import setup_monitor_parser
 from wisent.core.parser_arguments.agent_parser import setup_agent_parser
 from wisent.core.parser_arguments.model_config_parser import setup_model_config_parser

wisent 0.5.13__py3-none-any.whl → 0.5.15__py3-none-any.whl

Potentially problematic release.

wisent 0.5.13py3-none-any.whl → 0.5.15py3-none-any.whl