PyPI - wisent - Versions diffs - 0.5.12__py3-none-any.whl → 0.5.13__py3-none-any.whl - Mend

wisent 0.5.12py3-none-any.whl → 0.5.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (225) hide show

wisent/core/evaluators/benchmark_specific/perplexity_evaluator.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""Perplexity evaluator for language modeling benchmarks.
+Used for tasks that measure language modeling performance.
+"""
+from typing import Any
+import logging
+import math
+import torch
+from wisent.core.evaluators.core.atoms import BaseEvaluator, EvalResult
+logger = logging.getLogger(__name__)
+class PerplexityEvaluator(BaseEvaluator):
+    """Evaluator using perplexity for language modeling tasks.
+    Compatible with:
+    - WikiText: Language modeling
+    - LAMBADA: Word prediction in context
+    - Any loglikelihood_rolling task
+    """
+    name = "perplexity"
+    description = "Perplexity evaluator for language modeling"
+    task_names = ("wikitext", "lambada_openai", "lambada_standard")
+    def __init__(self, model=None):
+        """Initialize perplexity evaluator.
+        Args:
+            model: Model with loglikelihood capabilities
+        """
+        super().__init__()
+        self.model = model
+    def evaluate(self, response: str, expected: Any, **kwargs) -> EvalResult:
+        """Evaluate using perplexity.
+        Args:
+            response: Text to evaluate (for language modeling)
+            expected: NOT USED (perplexity is computed on response)
+            **kwargs:
+                model: Model instance (WisentModel or similar, overrides self.model)
+                context: Optional context for conditional generation
+        Returns:
+            EvalResult with perplexity as confidence metric (lower is better)
+        """
+        model = kwargs.get('model', self.model)
+        context = kwargs.get('context', '')
+        if model is None:
+            raise ValueError(
+                "No model provided for perplexity computation. "
+                "Please provide a model via __init__ or as a kwarg."
+            )
+        try:
+            # Compute perplexity
+            full_text = f"{context}{response}" if context else response
+            perplexity = self._compute_perplexity(model, full_text)
+            # Lower perplexity is better, so we use negative for confidence
+            # (higher confidence = lower perplexity)
+            confidence = -perplexity
+            return EvalResult(
+                ground_truth="EVALUATED",
+                method_used=self.name,
+                confidence=confidence,
+                details=f"Perplexity: {perplexity:.4f} (lower is better)",
+            )
+        except Exception as e:
+            logger.error(f"Error computing perplexity: {e}")
+            return EvalResult(
+                ground_truth="ERROR",
+                method_used=self.name,
+                confidence=0.0,
+                details=f"Perplexity computation failed: {str(e)}",
+            )
+    def _compute_perplexity(self, model, text: str) -> float:
+        """Compute perplexity for text.
+        Args:
+            model: Model with HuggingFace interface (WisentModel or similar)
+            text: Text to evaluate
+        Returns:
+            Perplexity value (lower is better)
+        """
+        # Get model and tokenizer from WisentModel
+        if hasattr(model, 'hf_model') and hasattr(model, 'tokenizer'):
+            hf_model = model.hf_model
+            tokenizer = model.tokenizer
+        else:
+            # Assume model is directly a HuggingFace model
+            hf_model = model
+            tokenizer = getattr(model, 'tokenizer', None)
+            if tokenizer is None:
+                raise ValueError("Model must have a tokenizer attribute")
+        # Tokenize the text
+        encodings = tokenizer(text, return_tensors='pt')
+        input_ids = encodings['input_ids'].to(hf_model.device)
+        # Get model outputs (logits)
+        with torch.no_grad():
+            outputs = hf_model(input_ids)
+            logits = outputs.logits
+        # Shift logits and labels for next-token prediction
+        # logits: [batch, seq_len, vocab_size]
+        # We want to predict tokens 1..N from tokens 0..N-1
+        shift_logits = logits[:, :-1, :].contiguous()
+        shift_labels = input_ids[:, 1:].contiguous()
+        # Compute log probabilities
+        log_probs = torch.nn.functional.log_softmax(shift_logits, dim=-1)
+        # Gather the log probabilities of the actual tokens
+        # shift_labels: [batch, seq_len-1]
+        # We need to gather from log_probs: [batch, seq_len-1, vocab_size]
+        batch_size, seq_len = shift_labels.shape
+        token_log_probs = log_probs.gather(
+            dim=-1,
+            index=shift_labels.unsqueeze(-1)
+        ).squeeze(-1)
+        # Compute negative log-likelihood (NLL)
+        nll = -token_log_probs.sum()
+        # Compute perplexity = exp(NLL / num_tokens)
+        num_tokens = seq_len
+        perplexity = torch.exp(nll / num_tokens)
+        return float(perplexity)

wisent/core/evaluators/benchmark_specific/personalization_evaluator.py ADDED Viewed

@@ -0,0 +1,250 @@
+"""Personalization evaluator for assessing personality trait manifestation in model responses.
+Evaluates how well model-generated responses exhibit specified personality traits
+by using an LLM judge to assess trait alignment and intensity.
+"""
+from typing import Any, Optional
+import logging
+import re
+from wisent.core.evaluators.core.atoms import BaseEvaluator, EvalResult
+logger = logging.getLogger(__name__)
+class PersonalizationEvaluator(BaseEvaluator):
+    """Evaluator for personality trait manifestation in responses.
+    Uses an LLM judge to evaluate how well responses exhibit target personality traits.
+    Compatible with personalization benchmarks that require trait assessment.
+    """
+    name = "personalization"
+    description = "Evaluates personality trait manifestation in model responses"
+    task_names = ("personalization",)
+    # Default judge model - can be overridden
+    default_judge_model = "gpt-4"
+    # Evaluation prompt template
+    JUDGE_PROMPT_TEMPLATE = """You are an expert evaluator analyzing personality traits in text responses.
+**Target Trait**: {trait}
+**Trait Description**: {trait_description}
+**Prompt Given to Model**: {prompt}
+**Model Response to Evaluate**:
+{response}
+Please evaluate how well this response exhibits the target trait "{trait}".
+Provide your evaluation in the following format:
+TRAIT_SCORE: [0-10]
+INTENSITY: [weak/moderate/strong]
+EXPLANATION: [Brief explanation of your rating]
+Where TRAIT_SCORE represents:
+- 0-2: Trait not present or contradicts trait
+- 3-4: Minimal trait manifestation
+- 5-6: Moderate trait manifestation
+- 7-8: Strong trait manifestation
+- 9-10: Exemplary trait manifestation
+Evaluate ONLY based on how well the response demonstrates the target trait, not overall quality."""
+    def __init__(self, judge_model: Optional[str] = None):
+        """Initialize personalization evaluator.
+        Args:
+            judge_model: Model to use as judge (default: gpt-4)
+        """
+        self.judge_model = judge_model or self.default_judge_model
+    def evaluate(self, response: str, expected: Any, **kwargs) -> EvalResult:
+        """Evaluate personality trait manifestation in response.
+        Args:
+            response: Generated model response to evaluate
+            expected: Expected trait information (dict with 'trait', 'trait_description')
+            **kwargs:
+                prompt: Original prompt given to model
+                judge_model: Override default judge model
+                use_mock: Use mock evaluation for testing (default: False)
+        Returns:
+            EvalResult with trait_score as confidence, details containing analysis
+        """
+        # Extract trait information
+        if isinstance(expected, dict):
+            trait = expected.get('trait', 'unknown')
+            trait_description = expected.get('trait_description', '')
+        else:
+            trait = str(expected)
+            trait_description = f"The trait: {trait}"
+        prompt = kwargs.get('prompt', '')
+        judge_model = kwargs.get('judge_model', self.judge_model)
+        use_mock = kwargs.get('use_mock', False)
+        # Mock evaluation for testing without API calls
+        if use_mock:
+            return self._mock_evaluate(response, trait, prompt)
+        # Prepare judge prompt
+        judge_prompt = self.JUDGE_PROMPT_TEMPLATE.format(
+            trait=trait,
+            trait_description=trait_description,
+            prompt=prompt,
+            response=response
+        )
+        try:
+            # Call LLM judge
+            judge_response = self._call_judge(judge_prompt, judge_model)
+            # Parse judge response
+            trait_score, intensity, explanation = self._parse_judge_response(judge_response)
+            # Normalize score to 0-1 confidence
+            confidence = trait_score / 10.0
+            # Determine ground truth based on score
+            if trait_score >= 7:
+                ground_truth = "TRUTHFUL"  # Strong trait manifestation
+            elif trait_score >= 4:
+                ground_truth = "UNKNOWN"  # Moderate trait manifestation
+            else:
+                ground_truth = "UNTRUTHFUL"  # Weak/absent trait manifestation
+            return EvalResult(
+                ground_truth=ground_truth,
+                method_used=self.name,
+                confidence=confidence,
+                details=f"Trait: {trait} | Score: {trait_score}/10 | Intensity: {intensity} | {explanation}",
+                meta={
+                    "trait": trait,
+                    "trait_score": trait_score,
+                    "intensity": intensity,
+                    "judge_model": judge_model,
+                    "explanation": explanation
+                }
+            )
+        except Exception as e:
+            logger.exception(f"Error in personalization evaluation: {e}")
+            return EvalResult(
+                ground_truth="UNKNOWN",
+                method_used=self.name,
+                confidence=0.0,
+                details=f"Evaluation error: {str(e)}",
+                meta={"trait": trait, "error": str(e)}
+            )
+    def _mock_evaluate(self, response: str, trait: str, prompt: str) -> EvalResult:
+        """Mock evaluation for testing without API calls.
+        Args:
+            response: Model response
+            trait: Target trait
+            prompt: Original prompt
+        Returns:
+            EvalResult with mock scores
+        """
+        # Simple heuristic: check if trait appears in response
+        response_lower = response.lower()
+        trait_lower = trait.lower()
+        # Calculate mock score based on simple heuristics
+        score = 5  # baseline
+        if trait_lower in response_lower:
+            score += 2
+        if len(response) > 100:
+            score += 1
+        if len(response.split('.')) > 2:
+            score += 1
+        score = min(score, 10)
+        intensity = "weak" if score < 5 else ("moderate" if score < 7 else "strong")
+        confidence = score / 10.0
+        ground_truth = "TRUTHFUL" if score >= 7 else ("UNKNOWN" if score >= 4 else "UNTRUTHFUL")
+        return EvalResult(
+            ground_truth=ground_truth,
+            method_used=f"{self.name}_mock",
+            confidence=confidence,
+            details=f"Mock evaluation | Trait: {trait} | Score: {score}/10 | Intensity: {intensity}",
+            meta={
+                "trait": trait,
+                "trait_score": score,
+                "intensity": intensity,
+                "judge_model": "mock",
+                "explanation": "Mock evaluation based on simple heuristics"
+            }
+        )
+    def _call_judge(self, prompt: str, model: str) -> str:
+        """Call LLM judge to evaluate response.
+        Args:
+            prompt: Judge prompt
+            model: Model identifier
+        Returns:
+            Judge response text
+        """
+        # This would call OpenAI API or other LLM API
+        # For now, raising NotImplementedError to indicate it needs implementation
+        try:
+            import openai
+            response = openai.ChatCompletion.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": "You are an expert evaluator of personality traits in text."},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.0,
+                max_tokens=500
+            )
+            return response.choices[0].message.content
+        except ImportError:
+            raise NotImplementedError(
+                "OpenAI package not installed. Install with: pip install openai\n"
+                "Or use use_mock=True for testing without API calls."
+            )
+        except Exception as e:
+            raise RuntimeError(f"Error calling judge model: {e}")
+    def _parse_judge_response(self, judge_response: str) -> tuple[float, str, str]:
+        """Parse judge response to extract score, intensity, and explanation.
+        Args:
+            judge_response: Raw judge response text
+        Returns:
+            Tuple of (trait_score, intensity, explanation)
+        """
+        # Extract TRAIT_SCORE
+        score_match = re.search(r'TRAIT_SCORE:\s*(\d+(?:\.\d+)?)', judge_response, re.IGNORECASE)
+        trait_score = float(score_match.group(1)) if score_match else 5.0
+        # Extract INTENSITY
+        intensity_match = re.search(r'INTENSITY:\s*(weak|moderate|strong)', judge_response, re.IGNORECASE)
+        intensity = intensity_match.group(1).lower() if intensity_match else "moderate"
+        # Extract EXPLANATION
+        explanation_match = re.search(r'EXPLANATION:\s*(.+?)(?=\n\n|\Z)', judge_response, re.IGNORECASE | re.DOTALL)
+        explanation = explanation_match.group(1).strip() if explanation_match else "No explanation provided"
+        return trait_score, intensity, explanation

wisent/{cli/evaluators/evaluator_rotator.py → core/evaluators/rotator.py} RENAMED Viewed

@@ -20,7 +20,7 @@ class EvaluatorRotator:
         self,
         evaluator: Union[str, BaseEvaluator, Type[BaseEvaluator], None] = None,
         task_name: Optional[str] = None,
-        evaluators_location: Union[str, Path] = "wisent_guard.core.evaluators.oracles",
+        evaluators_location: Union[str, Path] = "wisent.core.evaluators.oracles",
         autoload: bool = True,
     ) -> None:
         if autoload:
@@ -29,7 +29,7 @@ class EvaluatorRotator:
         self._task_name = task_name
     @staticmethod
-    def discover_evaluators(location: Union[str, Path] = "wisent_guard.core.evaluators.oracles") -> None:
+    def discover_evaluators(location: Union[str, Path] = "wisent.core.evaluators.oracles") -> None:
         """
         Import all evaluator modules so BaseEvaluator subclasses self-register.
@@ -130,10 +130,10 @@ class EvaluatorRotator:
 if __name__ == "__main__":
-    from evaluator_rotator import EvaluatorRotator
+    from wisent.core.evaluators.rotator import EvaluatorRotator
     rot = EvaluatorRotator(
-    evaluators_location="wisent_guard.core.evaluators.oracles",  # << no leading slash
+    evaluators_location="wisent.core.evaluators.oracles",  # << no leading slash
     autoload=True,
     )

wisent/core/lm_eval_harness_ground_truth.py CHANGED Viewed

@@ -7,7 +7,8 @@ This module provides ground truth evaluation using the lm-eval-harness framework
 import logging
 from typing import Any, Dict
-from wisent.core.activations import ActivationAggregationStrategy, Activations
+from wisent.core.activations.core.atoms import ActivationAggregationStrategy
+from wisent.core.activations.activations import Activations
 from wisent.core.layer import Layer
 logger = logging.getLogger(__name__)
@@ -636,7 +637,7 @@ class LMEvalHarnessGroundTruth:
         try:
             import json
-            eval_methods_path = "wisent_guard/parameters/benchmarks/benchmark_evaluation_methods.json"
+            eval_methods_path = "wisent/parameters/benchmarks/benchmark_evaluation_methods.json"
             with open(eval_methods_path) as f:
                 benchmark_methods = json.load(f)
                 return benchmark_methods.get(task_name, "text-generation")

wisent/core/main.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+Main entry point for the Wisent CLI.
+This module connects the argparse parser (wisent/core/parser_arguments/) to execution logic
+and provides the main() function that serves as the CLI entry point.
+"""
+import sys
+from wisent.core.parser_arguments import setup_parser
+from wisent.core.branding import print_banner
+from wisent.core.cli import execute_tasks, execute_generate_pairs_from_task, execute_generate_pairs, execute_get_activations, execute_create_steering_vector, execute_generate_vector_from_task, execute_generate_vector_from_synthetic, execute_optimize_classification, execute_optimize_steering, execute_generate_responses, execute_evaluate_responses
+def main():
+    """Main entry point for the Wisent CLI."""
+    # Show banner
+    print_banner("Wisent CLI", width=64, use_color=True)
+    # Parse arguments
+    parser = setup_parser()
+    args = parser.parse_args()
+    # If no command specified, show help
+    if not hasattr(args, 'command') or args.command is None:
+        parser.print_help()
+        sys.exit(0)
+    # Execute based on command
+    if args.command == 'tasks':
+        execute_tasks(args)
+    elif args.command == 'generate-pairs':
+        execute_generate_pairs(args)
+    elif args.command == 'generate-pairs-from-task':
+        execute_generate_pairs_from_task(args)
+    elif args.command == 'get-activations':
+        execute_get_activations(args)
+    elif args.command == 'create-steering-vector':
+        execute_create_steering_vector(args)
+    elif args.command == 'generate-vector-from-task':
+        execute_generate_vector_from_task(args)
+    elif args.command == 'generate-vector-from-synthetic':
+        execute_generate_vector_from_synthetic(args)
+    elif args.command == 'optimize-classification':
+        execute_optimize_classification(args)
+    elif args.command == 'optimize-steering':
+        execute_optimize_steering(args)
+    elif args.command == 'generate-responses':
+        execute_generate_responses(args)
+    elif args.command == 'evaluate-responses':
+        execute_evaluate_responses(args)
+    else:
+        print(f"\n✗ Command '{args.command}' is not yet implemented")
+        sys.exit(1)
+if __name__ == '__main__':
+    main()

wisent/core/model_persistence.py CHANGED Viewed

@@ -268,7 +268,7 @@ def create_classifier_metadata(
         'token_aggregation': token_aggregation,
         'detection_threshold': detection_threshold,
         'created_at': datetime.datetime.now().isoformat(),
-        'wisent_guard_version': '1.0.0'  # Could be dynamically determined
+        'wisent_version': '1.0.0'  # Could be dynamically determined
     }
     # Add any additional metadata
@@ -308,7 +308,7 @@ def create_steering_vector_metadata(
         'vector_strength': vector_strength,
         'training_samples': training_samples,
         'created_at': datetime.datetime.now().isoformat(),
-        'wisent_guard_version': '1.0.0'
+        'wisent_version': '1.0.0'
     }
     # Add any additional metadata

wisent/core/models/wisent_model.py CHANGED Viewed

@@ -95,7 +95,7 @@ class WisentModel:
         elif self.device == "cuda":
             load_kwargs["dtype"] = torch.float16
             load_kwargs["device_map"] = "auto"
-            load_kwargs["attn_implementation"] = "flash_attention_2"  # Use flash attention for CUDA
+            load_kwargs["attn_implementation"] = "flash_attention_2"  # Uses flash-attn for 2-4x speedup
         else:
             load_kwargs["dtype"] = torch.float32
             load_kwargs["device_map"] = None
@@ -330,7 +330,7 @@ class WisentModel:
         batch = self.tokenizer.pad(singles, padding=True, return_tensors="pt")
-        batch = {k: v.to(resolve_torch_device()) for k, v in batch.items()}
+        batch = {k: v.to(self.device) for k, v in batch.items()}
         return batch
@@ -463,8 +463,8 @@ class WisentModel:
             )
             # Move tensors to the correct device (same as _batch_encode does)
             batch = {
-                "input_ids": tokenizer_output["input_ids"].to(resolve_torch_device()),
-                "attention_mask": tokenizer_output["attention_mask"].to(resolve_torch_device())
+                "input_ids": tokenizer_output["input_ids"].to(self.device),
+                "attention_mask": tokenizer_output["attention_mask"].to(self.device)
             }
         else:
             # Current behavior: apply chat template
@@ -695,8 +695,8 @@ class WisentModel:
             )
             # Move tensors to the correct device (same as _batch_encode does)
             batch = {
-                "input_ids": tokenizer_output["input_ids"].to(resolve_torch_device()),
-                "attention_mask": tokenizer_output["attention_mask"].to(resolve_torch_device())
+                "input_ids": tokenizer_output["input_ids"].to(self.device),
+                "attention_mask": tokenizer_output["attention_mask"].to(self.device)
             }
         else:
             # Current behavior: apply chat template

wisent/core/optuna/classifier/optuna_classifier_optimizer.py CHANGED Viewed

@@ -28,12 +28,12 @@ def get_model_dtype(model) -> torch.dtype:
     Extract model's native dtype from parameters.
     Args:
-        model: PyTorch model or wisent_guard Model wrapper
+        model: PyTorch model or wisent Model wrapper
     Returns:
         The model's native dtype
     """
-    # Handle wisent_guard Model wrapper
+    # Handle wisent Model wrapper
     if hasattr(model, "hf_model"):
         model_params = model.hf_model.parameters()
     else:

wisent/core/optuna/steering/steering_optimization.py CHANGED Viewed

@@ -989,7 +989,7 @@ class SteeringOptimizer:
         On subsequent calls: Return cached classifier from current session
         Args:
-            model: Language model (wisent_guard Model wrapper)
+            model: Language model (wisent Model wrapper)
             optimization_config: Primary configuration source
             model_name: Fallback model name if optimization_config not provided
             task_name: Fallback task name if optimization_config not provided

wisent/core/parser_arguments/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""
+Parser arguments package for Wisent CLI.
+This package contains argument parser definitions for each CLI command.
+Each command has its own parser file for better organization and maintainability.
+"""
+from wisent.core.parser_arguments.main_parser import setup_parser
+__all__ = ["setup_parser"]

wisent 0.5.12__py3-none-any.whl → 0.5.13__py3-none-any.whl

Potentially problematic release.

wisent 0.5.12py3-none-any.whl → 0.5.13py3-none-any.whl