PyPI - wisent - Versions diffs - 0.5.12__py3-none-any.whl → 0.5.14__py3-none-any.whl - Mend

wisent 0.5.12py3-none-any.whl → 0.5.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (227) hide show

wisent/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.5.12"
1	+ __version__ = "0.5.14"

wisent/core/activations/__init__.py CHANGED Viewed

@@ -0,0 +1,26 @@
+"""Activation collection and management."""
+from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
+from wisent.core.activations.core.atoms import (
+    ActivationAggregationStrategy,
+    LayerActivations,
+)
+__all__ = [
+    "ActivationCollector",
+    "Activations",
+    "PromptConstructionStrategy",
+    "ActivationAggregationStrategy",
+    "LayerActivations",
+]
+def __getattr__(name):
+    """Lazy import to avoid circular dependencies."""
+    if name == "ActivationCollector":
+        from wisent.core.activations.activations_collector import ActivationCollector
+        return ActivationCollector
+    if name == "Activations":
+        from wisent.core.activations.activations import Activations
+        return Activations
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

wisent/core/activations/activations.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""Activation wrapper for classifier feature extraction."""
+from typing import Any
+import torch
+from wisent.core.activations.core.atoms import ActivationAggregationStrategy
+class Activations:
+    """Wrapper for activation tensors with aggregation strategy.
+    This class wraps activation tensors and provides methods to extract
+    features for classifier input based on the specified aggregation strategy.
+    """
+    def __init__(self, tensor: torch.Tensor, layer: Any, aggregation_strategy):
+        """Initialize Activations wrapper.
+        Args:
+            tensor: Activation tensor (typically shape [batch, seq_len, hidden_dim])
+            layer: Layer object containing layer metadata
+            aggregation_strategy: Strategy for aggregating tokens (string or ActivationAggregationStrategy enum)
+        """
+        self.tensor = tensor
+        self.layer = layer
+        # Convert string to enum if needed
+        if isinstance(aggregation_strategy, str):
+            # Map common string values to enum
+            strategy_map = {
+                "average": ActivationAggregationStrategy.MEAN_POOLING,
+                "mean": ActivationAggregationStrategy.MEAN_POOLING,
+                "final": ActivationAggregationStrategy.LAST_TOKEN,
+                "last": ActivationAggregationStrategy.LAST_TOKEN,
+                "first": ActivationAggregationStrategy.FIRST_TOKEN,
+                "max": ActivationAggregationStrategy.MAX_POOLING,
+                "mean_pooling": ActivationAggregationStrategy.MEAN_POOLING,
+                "last_token": ActivationAggregationStrategy.LAST_TOKEN,
+                "first_token": ActivationAggregationStrategy.FIRST_TOKEN,
+                "max_pooling": ActivationAggregationStrategy.MAX_POOLING,
+            }
+            self.aggregation_strategy = strategy_map.get(
+                aggregation_strategy.lower(),
+                ActivationAggregationStrategy.MEAN_POOLING
+            )
+        else:
+            self.aggregation_strategy = aggregation_strategy
+    def extract_features_for_classifier(self) -> torch.Tensor:
+        """Extract features from activations for classifier input.
+        Aggregates the activation tensor based on the specified strategy
+        to produce a single feature vector suitable for classifier input.
+        Returns:
+            torch.Tensor: Aggregated features (typically shape [hidden_dim])
+        """
+        if self.tensor is None:
+            raise ValueError("Cannot extract features from None tensor")
+        # Ensure tensor is 3D: [batch, seq_len, hidden_dim]
+        if len(self.tensor.shape) == 2:
+            # If [seq_len, hidden_dim], add batch dimension
+            tensor = self.tensor.unsqueeze(0)
+        else:
+            tensor = self.tensor
+        # Apply aggregation strategy
+        if self.aggregation_strategy == ActivationAggregationStrategy.MEAN_POOLING:
+            # Average over sequence length dimension
+            features = tensor.mean(dim=1).squeeze(0)
+        elif self.aggregation_strategy == ActivationAggregationStrategy.LAST_TOKEN:
+            # Take last token
+            features = tensor[:, -1, :].squeeze(0)
+        elif self.aggregation_strategy == ActivationAggregationStrategy.FIRST_TOKEN:
+            # Take first token
+            features = tensor[:, 0, :].squeeze(0)
+        elif self.aggregation_strategy == ActivationAggregationStrategy.MAX_POOLING:
+            # Max over sequence length dimension
+            features = tensor.max(dim=1)[0].squeeze(0)
+        else:
+            # Default to mean pooling
+            features = tensor.mean(dim=1).squeeze(0)
+        return features
+    def cpu(self):
+        """Move tensor to CPU."""
+        if self.tensor is not None:
+            self.tensor = self.tensor.cpu()
+        return self
+    def detach(self):
+        """Detach tensor from computation graph."""
+        if self.tensor is not None:
+            self.tensor = self.tensor.detach()
+        return self

wisent/core/activations/activations_collector.py CHANGED Viewed

@@ -1,12 +1,16 @@
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import Sequence
+from typing import Sequence, TYPE_CHECKING
 import torch
 from wisent.core.contrastive_pairs.core.pair import ContrastivePair
 from wisent.core.activations.core.atoms import LayerActivations, ActivationAggregationStrategy, LayerName, RawActivationMap
-from wisent.core.models.wisent_model import WisentModel
+from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
+if TYPE_CHECKING:
+    from wisent.core.models.wisent_model import WisentModel
 __all__ = ["ActivationCollector"]
 @dataclass(slots=True)
@@ -125,22 +129,23 @@ class ActivationCollector:
                     }
     """
-    model: WisentModel
+    model: "WisentModel"
     store_device: str | torch.device = "cpu"
     dtype: torch.dtype | None = None
     def collect_for_pair(
         self,
         pair: ContrastivePair,
-        layers: Sequence[LayerName] | None = None,
+        layers: Sequence[LayerName] | None = None,
         aggregation: ActivationAggregationStrategy = ActivationAggregationStrategy.CONTINUATION_TOKEN,
         return_full_sequence: bool = False,
         normalize_layers: bool = False,
+        prompt_strategy: PromptConstructionStrategy = PromptConstructionStrategy.CHAT_TEMPLATE,
     ) -> ContrastivePair:
         pos = self._collect_for_texts(pair.prompt, _resp_text(pair.positive_response),
-                                      layers, aggregation, return_full_sequence, normalize_layers)
+                                      layers, aggregation, return_full_sequence, normalize_layers, prompt_strategy)
         neg = self._collect_for_texts(pair.prompt, _resp_text(pair.negative_response),
-                                      layers, aggregation, return_full_sequence, normalize_layers)
+                                      layers, aggregation, return_full_sequence, normalize_layers, prompt_strategy)
         return pair.with_activations(positive=pos, negative=neg)
     def _collect_for_texts(
@@ -151,25 +156,16 @@ class ActivationCollector:
         aggregation: ActivationAggregationStrategy,
         return_full_sequence: bool,
         normalize_layers: bool = False,
+        prompt_strategy: PromptConstructionStrategy = PromptConstructionStrategy.CHAT_TEMPLATE,
     ) -> LayerActivations:
         self._ensure_eval_mode()
         with torch.inference_mode():
             tok = self.model.tokenizer # type: ignore[union-attr]
-            if not hasattr(tok, "apply_chat_template"):
-                raise RuntimeError("Tokenizer has no apply_chat_template; set it up or use a non-chat path.")
-            # 1) Build templated strings
-            prompt_text = tok.apply_chat_template(
-                [{"role": "user", "content": prompt}],
-                tokenize=False,
-                add_generation_prompt=True,
-            )
-            full_text = tok.apply_chat_template(
-                [{"role": "user", "content": prompt},
-                {"role": "assistant", "content": response}],
-                tokenize=False,
-                add_generation_prompt=False,
+            # 1) Build prompts based on strategy
+            prompt_text, full_text = self._build_prompts_for_strategy(
+                prompt, response, prompt_strategy, tok
             )
             # 2) Tokenize both with identical flags
@@ -217,6 +213,61 @@ class ActivationCollector:
                 activation_aggregation_strategy=None if return_full_sequence else aggregation,
             )
+    def _build_prompts_for_strategy(
+        self,
+        prompt: str,
+        response: str,
+        strategy: PromptConstructionStrategy,
+        tokenizer
+    ) -> tuple[str, str]:
+        """
+        Build prompt_text and full_text based on the chosen prompt construction strategy.
+        Returns:
+            (prompt_text, full_text): Tuple of prompt-only text and prompt+response text
+        """
+        if strategy == PromptConstructionStrategy.CHAT_TEMPLATE:
+            # Use model's built-in chat template
+            if not hasattr(tokenizer, "apply_chat_template"):
+                raise RuntimeError("Tokenizer has no apply_chat_template; set it up or use a different strategy.")
+            prompt_text = tokenizer.apply_chat_template(
+                [{"role": "user", "content": prompt}],
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+            full_text = tokenizer.apply_chat_template(
+                [{"role": "user", "content": prompt},
+                 {"role": "assistant", "content": response}],
+                tokenize=False,
+                add_generation_prompt=False,
+            )
+        elif strategy == PromptConstructionStrategy.DIRECT_COMPLETION:
+            # Q → good_resp/bad_resp (direct answer)
+            prompt_text = prompt
+            full_text = f"{prompt} {response}"
+        elif strategy == PromptConstructionStrategy.INSTRUCTION_FOLLOWING:
+            # [INST] Q [/INST] → good_resp/bad_resp (instruction format)
+            prompt_text = f"[INST] {prompt} [/INST]"
+            full_text = f"[INST] {prompt} [/INST] {response}"
+        elif strategy == PromptConstructionStrategy.MULTIPLE_CHOICE:
+            # Which is better: Q A. bad B. good → "A"/"B" (choice format)
+            # For multiple choice, we expect response to be "A" or "B"
+            prompt_text = f"Which is better: {prompt} A. [bad response] B. [good response]\nAnswer:"
+            full_text = f"{prompt_text} {response}"
+        elif strategy == PromptConstructionStrategy.ROLE_PLAYING:
+            # Behave like person who would answer Q with good_resp → "I" (role assumption)
+            prompt_text = f"Behave like a person who would answer '{prompt}' with '{response}'. Say 'I' to confirm:"
+            full_text = f"{prompt_text} I"
+        else:
+            raise ValueError(f"Unknown prompt construction strategy: {strategy}")
+        return prompt_text, full_text
     def _select_indices(self, layer_names: Sequence[str] | None, n_blocks: int) -> list[int]:
         """Map layer names '1'..'L' -> indices 0..L-1."""
         if not layer_names:

wisent/core/activations/prompt_construction_strategy.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Prompt construction strategies for activation collection."""
+from enum import Enum
+class PromptConstructionStrategy(Enum):
+    """
+    Strategies for constructing prompts from question-answer pairs.
+    These strategies determine how the prompt and response are formatted
+    before being passed to the model for activation extraction.
+    """
+    MULTIPLE_CHOICE = "multiple_choice"
+    """
+    Format: Which is better: Q A. bad B. good → "A"/"B" (choice format)
+    Example: "Which is better: What is 2+2? A. 5 B. 4"
+    Response: "A" or "B"
+    """
+    ROLE_PLAYING = "role_playing"
+    """
+    Format: Behave like person who would answer Q with good_resp → "I" (role assumption)
+    Example: "Behave like a person who would answer 'What is 2+2?' with '4'"
+    Response: "I"
+    """
+    DIRECT_COMPLETION = "direct_completion"
+    """
+    Format: Q → good_resp/bad_resp (direct answer)
+    Example: "What is 2+2?"
+    Response: "4" or "5"
+    """
+    INSTRUCTION_FOLLOWING = "instruction_following"
+    """
+    Format: [INST] Q [/INST] → good_resp/bad_resp (instruction format)
+    Example: "[INST] What is 2+2? [/INST]"
+    Response: "4" or "5"
+    """
+    CHAT_TEMPLATE = "chat_template"
+    """
+    Format: Uses the model's built-in chat template
+    Example: <|start_header_id|>user<|end_header_id|>What is 2+2?<|eot_id|>
+    Response: Model's chat-formatted response
+    """

wisent/core/agent/__init__.py CHANGED Viewed

@@ -1,18 +1 @@
-"""
-Agent module for wisent-guard autonomous systems.
-This module provides:
-- ResponseDiagnostics: Response analysis and quality assessment
-- ResponseSteering: Response improvement and steering
-- Data classes for analysis and improvement results
-"""
-from .diagnose import ResponseDiagnostics, AnalysisResult
-from .steer import ResponseSteering, ImprovementResult
-__all__ = [
-    'ResponseDiagnostics',
-    'AnalysisResult',
-    'ResponseSteering',
-    'ImprovementResult'
-]
+# Empty __init__.py to avoid cascading import errors with empty __init__ pattern

wisent/core/agent/budget.py CHANGED Viewed

@@ -276,7 +276,7 @@ class BudgetManager:
                     return estimate_task_time("benchmark_eval", 100)
             except Exception as e:
-                raise RuntimeError(f"Device benchmark estimate failed for task '{task_name}': {e}. Run device benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
+                raise RuntimeError(f"Device benchmark estimate failed for task '{task_name}': {e}. Run device benchmark first with: python -m wisent.core.agent.budget benchmark")
         elif resource_type == ResourceType.MEMORY:
             raise RuntimeError(f"Memory estimation not implemented for task '{task_name}'")
@@ -348,7 +348,7 @@ def calculate_max_tasks_for_time_budget(task_type: str = "benchmark_evaluation",
         return max_tasks
     except Exception as e:
-        raise RuntimeError(f"Budget calculation failed for task '{task_type}': {e}. Run device benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
+        raise RuntimeError(f"Budget calculation failed for task '{task_type}': {e}. Run device benchmark first with: python -m wisent.core.agent.budget benchmark")
 def optimize_tasks_for_budget(task_candidates: List[str],

wisent/core/agent/device_benchmarks.py CHANGED Viewed

@@ -629,7 +629,7 @@ except Exception as e:
         """
         benchmark = self.get_current_benchmark()
         if not benchmark:
-            raise RuntimeError(f"No benchmark available for device. Run benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
+            raise RuntimeError(f"No benchmark available for device. Run benchmark first with: python -m wisent.core.agent.budget benchmark")
         else:
             # Use actual benchmark results
             if task_type == "model_loading":

wisent/core/agent/diagnose/__init__.py CHANGED Viewed

@@ -1,55 +1 @@
-"""
-Diagnostic module for autonomous agent.
-This module provides:
-- Classifier selection and auto-discovery
-- On-the-fly classifier creation
-- Response analysis and quality assessment
-"""
-# Response diagnostics
-from .response_diagnostics import ResponseDiagnostics, AnalysisResult
-# Classifier management
-from .select_classifiers import ClassifierSelector, ClassifierInfo, SelectionCriteria, auto_select_classifiers_for_agent
-from .create_classifier import ClassifierCreator, TrainingConfig, TrainingResult, create_classifier_on_demand
-# New marketplace system
-from .classifier_marketplace import (
-    ClassifierMarketplace,
-    ClassifierListing,
-    ClassifierCreationEstimate
-)
-# Agent decision system
-from .agent_classifier_decision import (
-    AgentClassifierDecisionSystem,
-    TaskAnalysis,
-    ClassifierDecision
-)
-__all__ = [
-    # Response diagnostics
-    'ResponseDiagnostics',
-    'AnalysisResult',
-    # Legacy classifier management (for backward compatibility)
-    'ClassifierSelector',
-    'ClassifierInfo',
-    'SelectionCriteria',
-    'auto_select_classifiers_for_agent',
-    'ClassifierCreator',
-    'TrainingConfig',
-    'TrainingResult',
-    'create_classifier_on_demand',
-    # New marketplace system
-    'ClassifierMarketplace',
-    'ClassifierListing',
-    'ClassifierCreationEstimate',
-    # Agent decision system
-    'AgentClassifierDecisionSystem',
-    'TaskAnalysis',
-    'ClassifierDecision'
-]
+# Empty __init__.py to avoid cascading import errors with empty __init__ pattern

wisent/core/agent/diagnose/classifier_marketplace.py CHANGED Viewed

@@ -53,9 +53,9 @@ class ClassifierMarketplace:
         self.search_paths = search_paths or [
             "./models/",
             "./classifiers/",
-            "./wisent_guard/models/",
-            "./wisent_guard/classifiers/",
-            "./wisent_guard/core/classifiers/"
+            "./wisent/models/",
+            "./wisent/classifiers/",
+            "./wisent/core/classifiers/"
         ]
         self.available_classifiers: List[ClassifierListing] = []
         self._training_time_cache = {}
@@ -75,8 +75,8 @@ class ClassifierMarketplace:
             if not os.path.exists(search_path):
                 continue
-            # For wisent_guard/core/classifiers, search recursively for the nested structure
-            if "wisent_guard/core/classifiers" in search_path:
+            # For wisent/core/classifiers, search recursively for the nested structure
+            if "wisent/core/classifiers" in search_path:
                 import glob
                 pattern = os.path.join(search_path, "**", "*.pkl")
                 classifier_files = glob.glob(pattern, recursive=True)
@@ -163,9 +163,9 @@ class ClassifierMarketplace:
         """Parse layer and issue type from filename."""
         filename = os.path.basename(filepath).lower()
-        # Check if this is from wisent_guard/core/classifiers with nested structure
-        if "wisent_guard/core/classifiers" in filepath:
-            # Extract from path structure: wisent_guard/core/classifiers/{model}/{benchmark}/layer_{layer}.pkl
+        # Check if this is from wisent/core/classifiers with nested structure
+        if "wisent/core/classifiers" in filepath:
+            # Extract from path structure: wisent/core/classifiers/{model}/{benchmark}/layer_{layer}.pkl
             path_parts = filepath.split(os.sep)
             # Find the benchmark name (second to last directory)

wisent/core/agent/diagnose/response_diagnostics.py CHANGED Viewed

@@ -11,11 +11,11 @@ This module handles:
 from dataclasses import dataclass
 from typing import Any, Dict, List
-from wisent.core.activations import ActivationAggregationStrategy, Activations
+from wisent.core.activations.core.atoms import ActivationAggregationStrategy
+from wisent.core.activations.activations import Activations
 from wisent.core.classifier.classifier import Classifier
-from ...layer import Layer
-from ...model import Model
+from wisent.core.layer import Layer
+from wisent.core.model import Model
 @dataclass

wisent/core/agent/diagnose/synthetic_classifier_option.py CHANGED Viewed

@@ -193,7 +193,7 @@ class SyntheticClassifierFactory:
             logging.info("Starting classifier training...")
             try:
                 # Convert activations to the format expected by train_on_activations method
-                from wisent.core.activations import Activations
+                from wisent.core.activations.activations import Activations
                 # Convert torch tensors to Activations objects if needed
                 harmful_activations = []

wisent/core/agent/diagnose/tasks/task_manager.py CHANGED Viewed

@@ -331,7 +331,7 @@ def handle_configurable_group_task(task_name: str):
     # Look for existing YAML files in common directories
     yaml_candidates = []
     search_dirs = [
-        "wisent_guard/parameters/tasks",
+        "wisent/parameters/tasks",
         ".",
         "tasks",
         "configs"
@@ -891,7 +891,7 @@ def save_custom_task_yaml(task_name: str, yaml_content: str) -> Optional[str]:
     """
     try:
         # Create the tasks directory if it doesn't exist
-        tasks_dir = os.path.join("wisent_guard", "parameters", "tasks")
+        tasks_dir = os.path.join("wisent", "parameters", "tasks")
         os.makedirs(tasks_dir, exist_ok=True)
         # Save the YAML content to a file
@@ -993,7 +993,7 @@ def create_flan_held_in_files() -> Optional[str]:
     """
     try:
         # Create the tasks directory
-        tasks_dir = os.path.join("wisent_guard", "parameters", "tasks")
+        tasks_dir = os.path.join("wisent", "parameters", "tasks")
         os.makedirs(tasks_dir, exist_ok=True)
         # Create the template file first

wisent/core/agent/diagnose.py CHANGED Viewed

@@ -11,7 +11,8 @@ This module handles:
 from dataclasses import dataclass
 from typing import Any, Dict, List
-from wisent.core.activations import ActivationAggregationStrategy, Activations
+from wisent.core.activations.core.atoms import ActivationAggregationStrategy
+from wisent.core.activations.activations import Activations
 from wisent.core.classifier.classifier import Classifier
 from ..layer import Layer

wisent/core/autonomous_agent.py CHANGED Viewed

@@ -12,7 +12,8 @@ A model that can autonomously use wisent-guard capabilities on itself:
 import asyncio
 from typing import Any, Dict, List, Optional
-from wisent.core.activations import ActivationAggregationStrategy, Activations
+from wisent.core.activations.core.atoms import ActivationAggregationStrategy
+from wisent.core.activations.activations import Activations
 from .agent.diagnose import AgentClassifierDecisionSystem, AnalysisResult, ClassifierMarketplace, ResponseDiagnostics
 from .agent.steer import ImprovementResult, ResponseSteering
@@ -768,9 +769,16 @@ class AutonomousAgent:
             if not classifier_config:
                 return None
+            # Validate required classifier configuration
+            if "layer" not in classifier_config:
+                raise ValueError(
+                    "Classifier configuration must include 'layer' parameter. "
+                    "Please ensure your classifier configuration file specifies the optimal layer."
+                )
             # Create ClassifierParams from stored data
             params = ClassifierParams(
-                optimal_layer=classifier_config.get("layer", 15),
+                optimal_layer=classifier_config["layer"],
                 classification_threshold=classifier_config.get("threshold", 0.5),
                 training_samples=classifier_config.get("samples", 25),
                 classifier_type=classifier_config.get("type", "logistic"),

wisent 0.5.12__py3-none-any.whl → 0.5.14__py3-none-any.whl

Potentially problematic release.

wisent 0.5.12py3-none-any.whl → 0.5.14py3-none-any.whl