PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl - Mend

wisent 0.7.379__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1720) hide show

wisent/core/activations/core/atoms.py ADDED Viewed

@@ -0,0 +1,219 @@
+from __future__ import annotations
+from enum import Enum, auto, unique
+from typing import Mapping, Iterator, TypeAlias
+import numpy as np
+import torch
+import sys
+from wisent.core.errors import UnknownTypeError
+# Python 3.10 compatibility
+if sys.version_info >= (3, 11):
+    from enum import StrEnum
+else:
+    class StrEnum(str, Enum):
+        """StrEnum backport for Python < 3.11"""
+        def _generate_next_value_(name, start, count, last_values):
+            return name.lower()
+        def __str__(self) -> str:
+            return str(self.value)
+__all__ = ["LayerActivations", "ActivationAggregationStrategy", "ActivationCollector", "LayerName", "LayerActivation", "ActivationMap", "RawActivationMap"]
+LayerName: TypeAlias = str
+LayerActivation: TypeAlias = torch.Tensor | None
+ActivationMap: TypeAlias = Mapping[LayerName, LayerActivation]
+RawActivationMap: TypeAlias = Mapping[LayerName, torch.Tensor | np.ndarray | None]
+class _LowerSnakeStrEnum(StrEnum):
+    """StrEnum whose auto() values are lower_snake_case of the member name."""
+    def _generate_next_value_(name, start, count, last_values): # type: ignore
+        return name.lower()
+@unique
+class ActivationAggregationStrategy(_LowerSnakeStrEnum):
+    """Strategies for selecting/aggregating tokens in activation extraction.
+    """
+    CHOICE_TOKEN = auto()         # target A/B choice tokens (multiple choice)
+    CONTINUATION_TOKEN = auto()   # first token of the continuation
+    LAST_TOKEN = auto()           # always use the last token
+    FIRST_TOKEN = auto()          # always use the first token
+    MEAN_POOLING = auto()         # mean over all tokens
+    MAX_POOLING = auto()          # max over all tokens
+    @property
+    def description(self) -> str:
+        return {
+            ActivationAggregationStrategy.CHOICE_TOKEN: "Target A/B choice tokens (multiple choice).",
+            ActivationAggregationStrategy.CONTINUATION_TOKEN: "Use the first token of the continuation.",
+            ActivationAggregationStrategy.LAST_TOKEN: "Always select the last token.",
+            ActivationAggregationStrategy.FIRST_TOKEN: "Always select the first token.",
+            ActivationAggregationStrategy.MEAN_POOLING: "Aggregate by mean over all tokens.",
+            ActivationAggregationStrategy.MAX_POOLING: "Aggregate by max over all tokens.",
+        }[self]
+class LayerActivations(Mapping[LayerName, LayerActivation]):
+    """Immutable mapping of layer names to activations.
+    Behaves like: 'Mapping[str, torch.Tensor | None]'.
+    construction:
+        'LayerActivations(data: Mapping[str, torch.Tensor | np.ndarray | None] | None, *, dtype: torch.dtype | None = None)'
+        - 'torch.Tensor' values are kept as-is (or cast to 'dtype' if given).
+        - 'np.ndarray' values are converted via 'torch.from_numpy' (then cast if needed).
+        - 'None' values are preserved.
+        -  Missing/empty input yields an empty container.
+    atributes:
+        _data:
+            internal storage dict. It contains information about layer activations.
+        _strategy:
+            'ActivationAggregationStrategy' (see below). Indicates how activations were aggregated if applicable.
+    methods:
+        'summary()':
+            dict with per-layer shape/dtype/device/requires_grad.
+        'to(*args, **kwargs)':
+            apply 'Tensor.to' to all non-'None' values.
+        'cpu()', 'detach()':
+            convenience operations.
+        'numpy()':
+            map tensors to cpu NumPy arrays (others to 'None').
+        'to_dict()':
+            plain dict (useful for (de)serialization).
+    examples:
+        >>> acts = LayerActivations({"layer1": torch.randn(2, 10, 768), "layer2": None}, activation_aggregation_strategy="mean_pooling")
+        >>> acts["layer1"].shape
+        torch.Size([2, 10, 768])
+        >>> acts["layer2"] is None
+        True
+        >>> acts.activation_aggregation_strategy
+        <ActivationAggregationStrategy.MEAN_POOLING: 'mean_pooling'>
+        >>> acts.summary()
+        {'layer1': {'shape': (2, 10, 768), 'dtype': 'torch.float32', 'device': 'cpu', 'requires_grad': False}, 'layer2': {'shape': None, 'dtype': None, 'device': None, 'requires_grad': None}}
+        >>> acts.numpy()
+        {'layer1': array(...), 'layer2': None}
+        >>> acts.to("cuda")
+        LayerActivations(
+          layer1: Tensor(shape=(2, 10, 768), dtype=torch.float32, device=cuda:0)
+          layer2: None
+        )
+        >>> acts.detach()  # if any tensor required grad
+        LayerActivations(
+          layer1: Tensor(shape=(2, 10, 768), dtype=torch.float32, device=cpu)
+          layer2: None
+        )
+    notes:
+        - Use 'summary()' or 'numpy()' if you need JSON-serializable content.
+        - Keys are strings by convention; enforced by type hints.
+    """
+    __slots__ = ("_data", "_strategy")
+    def __init__(self, data: RawActivationMap | None = None, activation_aggregation_strategy: ActivationAggregationStrategy | None = None, dtype: torch.dtype | None = None):
+        store: dict[LayerName, LayerActivation] = {}
+        if data:
+            for layer, val in data.items():
+                if val is None:
+                    store[layer] = None
+                elif isinstance(val, torch.Tensor):
+                    store[layer] = val if dtype is None else val.to(dtype)
+                elif isinstance(val, np.ndarray):
+                    t = torch.from_numpy(val)
+                    store[layer] = t if dtype is None else t.to(dtype)
+                else:
+                    raise TypeError(
+                        f"Activations for layer '{layer}' must be torch.Tensor, np.ndarray, or None."
+                    )
+        self._data = store
+        self._strategy = self._normalize_strategy(activation_aggregation_strategy)
+    @staticmethod
+    def _normalize_strategy(
+        s: ActivationAggregationStrategy | str | None
+    ) -> ActivationAggregationStrategy | None:
+        if s is None:
+            return None
+        if isinstance(s, ActivationAggregationStrategy):
+            return s
+        if isinstance(s, str):
+            try:
+                return ActivationAggregationStrategy(s)
+            except ValueError:
+                valid = [e.value for e in ActivationAggregationStrategy]
+                raise UnknownTypeError(
+                    entity_type="activation_agregation_strategy",
+                    value=s,
+                    valid_values=valid
+                )
+        raise TypeError(
+            "activation_agregation_strategy must be ActivationAggregationStrategy | str | None"
+        )
+    @property
+    def activation_aggregation_strategy(self) -> ActivationAggregationStrategy | None:
+        return self._strategy
+    def __getitem__(self, key: LayerName) -> LayerActivation:
+        return self._data[key]
+    def __iter__(self) -> Iterator[LayerName]:
+        return iter(self._data)
+    def __len__(self) -> int:
+        return len(self._data)
+    def summary(self) -> dict[LayerName, dict[str, tuple | str | bool | None]]:
+        ''' Return a summary of the activations. For each layer, provides
+        shape, dtype, device, requires_grad status, and aggregation strategy.
+        '''
+        out: dict[LayerName, dict[str, dict[str, tuple | str | bool | None]]] = {}
+        for k, v in self._data.items():
+            if isinstance(v, torch.Tensor):
+                out[k] = {
+                    "shape": tuple(v.shape),
+                    "dtype": str(v.dtype),
+                    "device": str(v.device),
+                    "requires_grad": bool(v.requires_grad),
+                }
+            else:
+                out[k] = {"shape": None, "dtype": None, "device": None, "requires_grad": None}
+        out["_activation_aggregation_strategy"] = {"strategy": self._strategy.value if self._strategy else None}
+        return out
+    def numpy(self) -> dict[LayerName, np.ndarray | None]:
+        return {k: (v.detach().cpu().numpy() if isinstance(v, torch.Tensor) else None)
+                for k, v in self._data.items()}
+    def to_dict(self) -> dict[LayerName, LayerActivation]:
+        return dict(self._data)
+    def to(self, *args, **kwargs) -> LayerActivations:
+        return LayerActivations({k: (v.to(*args, **kwargs) if isinstance(v, torch.Tensor) else None)
+                                 for k, v in self._data.items()})
+    def detach(self) -> LayerActivations:
+        return LayerActivations({k: (v.detach() if isinstance(v, torch.Tensor) else None)
+                                 for k, v in self._data.items()})
+    def cpu(self) -> LayerActivations:
+        return self.to("cpu")
+    def __repr__(self) -> str:
+        lines = ["LayerActivations("]
+        for k, v in self._data.items():
+            if isinstance(v, torch.Tensor):
+                lines.append(
+                    f"  {k}: Tensor(shape={tuple(v.shape)}, dtype={v.dtype}, device={v.device})"
+                )
+            else:
+                lines.append(f"  {k}: None")
+        lines.append(")")
+        lines.append(f"  _activation_aggregation_strategy: {self._strategy.value if self._strategy else None}")
+        return "\n".join(lines)

wisent/core/activations/prompt_construction_strategy.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Prompt construction strategies for activation collection."""
+from enum import Enum
+class PromptConstructionStrategy(Enum):
+    """
+    Strategies for constructing prompts from question-answer pairs.
+    These strategies determine how the prompt and response are formatted
+    before being passed to the model for activation extraction.
+    """
+    MULTIPLE_CHOICE = "multiple_choice"
+    """
+    Format: Which is better: Q A. bad B. good → "A"/"B" (choice format)
+    Example: "Which is better: What is 2+2? A. 5 B. 4"
+    Response: "A" or "B"
+    """
+    ROLE_PLAYING = "role_playing"
+    """
+    Format: Behave like person who would answer Q with good_resp → "I" (role assumption)
+    Example: "Behave like a person who would answer 'What is 2+2?' with '4'"
+    Response: "I"
+    """
+    DIRECT_COMPLETION = "direct_completion"
+    """
+    Format: Q → good_resp/bad_resp (direct answer)
+    Example: "What is 2+2?"
+    Response: "4" or "5"
+    """
+    INSTRUCTION_FOLLOWING = "instruction_following"
+    """
+    Format: [INST] Q [/INST] → good_resp/bad_resp (instruction format)
+    Example: "[INST] What is 2+2? [/INST]"
+    Response: "4" or "5"
+    """
+    CHAT_TEMPLATE = "chat_template"
+    """
+    Format: Uses the model's built-in chat template
+    Example: <|start_header_id|>user<|end_header_id|>What is 2+2?<|eot_id|>
+    Response: Model's chat-formatted response
+    """

wisent/core/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""
+Multi-modal adapters for Wisent contrastive steering.
+Adapters provide a unified interface for different modalities (text, audio, video, robotics)
+while keeping the core steering logic modality-agnostic.
+"""
+from wisent.core.adapters.base import BaseAdapter, AdapterError
+from wisent.core.adapters.text import TextAdapter
+from wisent.core.adapters.audio import AudioAdapter
+from wisent.core.adapters.video import VideoAdapter
+from wisent.core.adapters.robotics import RoboticsAdapter
+from wisent.core.adapters.multimodal import MultimodalAdapter
+__all__ = [
+    "BaseAdapter",
+    "AdapterError",
+    "TextAdapter",
+    "AudioAdapter",
+    "VideoAdapter",
+    "RoboticsAdapter",
+    "MultimodalAdapter",
+]