PyPI - wisent - Versions diffs - 0.1.1__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

wisent 0.1.1py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (237) hide show

wisent/__init__.py +1 -8
wisent/benchmarks/__init__.py +0 -0
wisent/benchmarks/coding/__init__.py +0 -0
wisent/benchmarks/coding/metrics/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
wisent/benchmarks/coding/metrics/evaluator.py +275 -0
wisent/benchmarks/coding/metrics/passk.py +66 -0
wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
wisent/benchmarks/coding/providers/__init__.py +18 -0
wisent/benchmarks/coding/providers/core/__init__.py +0 -0
wisent/benchmarks/coding/providers/core/atoms.py +31 -0
wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
wisent/classifiers/__init__.py +0 -0
wisent/classifiers/core/__init__.py +0 -0
wisent/classifiers/core/atoms.py +747 -0
wisent/classifiers/models/__init__.py +0 -0
wisent/classifiers/models/logistic.py +29 -0
wisent/classifiers/models/mlp.py +47 -0
wisent/cli/__init__.py +0 -0
wisent/cli/classifiers/__init__.py +0 -0
wisent/cli/classifiers/classifier_rotator.py +137 -0
wisent/cli/cli_logger.py +142 -0
wisent/cli/data_loaders/__init__.py +0 -0
wisent/cli/data_loaders/data_loader_rotator.py +96 -0
wisent/cli/evaluators/__init__.py +0 -0
wisent/cli/evaluators/evaluator_rotator.py +148 -0
wisent/cli/steering_methods/__init__.py +0 -0
wisent/cli/steering_methods/steering_rotator.py +110 -0
wisent/cli/wisent_cli/__init__.py +0 -0
wisent/cli/wisent_cli/commands/__init__.py +0 -0
wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
wisent/cli/wisent_cli/commands/listing.py +154 -0
wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
wisent/cli/wisent_cli/main.py +93 -0
wisent/cli/wisent_cli/shell.py +80 -0
wisent/cli/wisent_cli/ui.py +69 -0
wisent/cli/wisent_cli/util/__init__.py +0 -0
wisent/cli/wisent_cli/util/aggregations.py +43 -0
wisent/cli/wisent_cli/util/parsing.py +126 -0
wisent/cli/wisent_cli/version.py +4 -0
wisent/core/__init__.py +27 -0
wisent/core/activations/__init__.py +0 -0
wisent/core/activations/activations_collector.py +338 -0
wisent/core/activations/core/__init__.py +0 -0
wisent/core/activations/core/atoms.py +216 -0
wisent/core/agent/__init__.py +18 -0
wisent/core/agent/budget.py +638 -0
wisent/core/agent/device_benchmarks.py +685 -0
wisent/core/agent/diagnose/__init__.py +55 -0
wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
wisent/core/agent/diagnose/create_classifier.py +1154 -0
wisent/core/agent/diagnose/response_diagnostics.py +268 -0
wisent/core/agent/diagnose/select_classifiers.py +506 -0
wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
wisent/core/agent/diagnose/tasks/__init__.py +33 -0
wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
wisent/core/agent/diagnose.py +242 -0
wisent/core/agent/steer.py +212 -0
wisent/core/agent/timeout.py +134 -0
wisent/core/autonomous_agent.py +1234 -0
wisent/core/bigcode_integration.py +583 -0
wisent/core/contrastive_pairs/__init__.py +15 -0
wisent/core/contrastive_pairs/core/__init__.py +0 -0
wisent/core/contrastive_pairs/core/atoms.py +45 -0
wisent/core/contrastive_pairs/core/buliders.py +59 -0
wisent/core/contrastive_pairs/core/pair.py +178 -0
wisent/core/contrastive_pairs/core/response.py +152 -0
wisent/core/contrastive_pairs/core/serialization.py +300 -0
wisent/core/contrastive_pairs/core/set.py +133 -0
wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
wisent/core/data_loaders/__init__.py +0 -0
wisent/core/data_loaders/core/__init__.py +0 -0
wisent/core/data_loaders/core/atoms.py +98 -0
wisent/core/data_loaders/loaders/__init__.py +0 -0
wisent/core/data_loaders/loaders/custom.py +120 -0
wisent/core/data_loaders/loaders/lm_loader.py +218 -0
wisent/core/detection_handling.py +257 -0
wisent/core/download_full_benchmarks.py +1386 -0
wisent/core/evaluators/__init__.py +0 -0
wisent/core/evaluators/oracles/__init__.py +0 -0
wisent/core/evaluators/oracles/interactive.py +73 -0
wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
wisent/core/evaluators/oracles/user_specified.py +67 -0
wisent/core/hyperparameter_optimizer.py +429 -0
wisent/core/lm_eval_harness_ground_truth.py +1396 -0
wisent/core/log_likelihoods_evaluator.py +321 -0
wisent/core/managed_cached_benchmarks.py +595 -0
wisent/core/mixed_benchmark_sampler.py +364 -0
wisent/core/model_config_manager.py +330 -0
wisent/core/model_persistence.py +317 -0
wisent/core/models/__init__.py +0 -0
wisent/core/models/core/__init__.py +0 -0
wisent/core/models/core/atoms.py +460 -0
wisent/core/models/wisent_model.py +727 -0
wisent/core/multi_steering.py +316 -0
wisent/core/optuna/__init__.py +57 -0
wisent/core/optuna/classifier/__init__.py +25 -0
wisent/core/optuna/classifier/activation_generator.py +349 -0
wisent/core/optuna/classifier/classifier_cache.py +509 -0
wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
wisent/core/optuna/steering/__init__.py +0 -0
wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
wisent/core/optuna/steering/data_utils.py +342 -0
wisent/core/optuna/steering/metrics.py +474 -0
wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
wisent/core/optuna/steering/steering_optimization.py +1111 -0
wisent/core/parser.py +1668 -0
wisent/core/prompts/__init__.py +0 -0
wisent/core/prompts/core/__init__.py +0 -0
wisent/core/prompts/core/atom.py +57 -0
wisent/core/prompts/core/prompt_formater.py +157 -0
wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
wisent/core/representation.py +5 -0
wisent/core/sample_size_optimizer.py +648 -0
wisent/core/sample_size_optimizer_v2.py +355 -0
wisent/core/save_results.py +277 -0
wisent/core/steering.py +652 -0
wisent/core/steering_method.py +26 -0
wisent/core/steering_methods/__init__.py +0 -0
wisent/core/steering_methods/core/__init__.py +0 -0
wisent/core/steering_methods/core/atoms.py +153 -0
wisent/core/steering_methods/methods/__init__.py +0 -0
wisent/core/steering_methods/methods/caa.py +44 -0
wisent/core/steering_optimizer.py +1297 -0
wisent/core/task_interface.py +132 -0
wisent/core/task_selector.py +189 -0
wisent/core/tasks/__init__.py +175 -0
wisent/core/tasks/aime_task.py +141 -0
wisent/core/tasks/file_task.py +211 -0
wisent/core/tasks/hle_task.py +180 -0
wisent/core/tasks/hmmt_task.py +119 -0
wisent/core/tasks/livecodebench_task.py +201 -0
wisent/core/tasks/livemathbench_task.py +158 -0
wisent/core/tasks/lm_eval_task.py +455 -0
wisent/core/tasks/math500_task.py +84 -0
wisent/core/tasks/polymath_task.py +146 -0
wisent/core/tasks/supergpqa_task.py +220 -0
wisent/core/time_estimator.py +149 -0
wisent/core/timing_calibration.py +174 -0
wisent/core/tracking/__init__.py +54 -0
wisent/core/tracking/latency.py +618 -0
wisent/core/tracking/memory.py +359 -0
wisent/core/trainers/__init__.py +0 -0
wisent/core/trainers/core/__init__.py +11 -0
wisent/core/trainers/core/atoms.py +45 -0
wisent/core/trainers/steering_trainer.py +271 -0
wisent/core/user_model_config.py +158 -0
wisent/opti/__init__.py +0 -0
wisent/opti/core/__init__.py +0 -0
wisent/opti/core/atoms.py +175 -0
wisent/opti/methods/__init__.py +0 -0
wisent/opti/methods/opti_classificator.py +172 -0
wisent/opti/methods/opti_steering.py +138 -0
wisent/synthetic/__init__.py +0 -0
wisent/synthetic/cleaners/__init__.py +0 -0
wisent/synthetic/cleaners/core/__init__.py +0 -0
wisent/synthetic/cleaners/core/atoms.py +58 -0
wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
wisent/synthetic/cleaners/methods/__init__.py +0 -0
wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
wisent/synthetic/db_instructions/__init__.py +0 -0
wisent/synthetic/db_instructions/core/__init__.py +0 -0
wisent/synthetic/db_instructions/core/atoms.py +25 -0
wisent/synthetic/db_instructions/mini_dp.py +37 -0
wisent/synthetic/generators/__init__.py +0 -0
wisent/synthetic/generators/core/__init__.py +0 -0
wisent/synthetic/generators/core/atoms.py +73 -0
wisent/synthetic/generators/diversities/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/core.py +68 -0
wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
wisent/synthetic/generators/pairs_generator.py +179 -0
wisent-0.5.2.dist-info/METADATA +67 -0
wisent-0.5.2.dist-info/RECORD +218 -0
{wisent-0.1.1.dist-info → wisent-0.5.2.dist-info}/WHEEL +1 -1
{wisent-0.1.1.dist-info → wisent-0.5.2.dist-info/licenses}/LICENSE +2 -2
wisent/activations/__init__.py +0 -9
wisent/activations/client.py +0 -97
wisent/activations/extractor.py +0 -251
wisent/activations/models.py +0 -95
wisent/client.py +0 -45
wisent/control_vector/__init__.py +0 -9
wisent/control_vector/client.py +0 -85
wisent/control_vector/manager.py +0 -168
wisent/control_vector/models.py +0 -70
wisent/inference/__init__.py +0 -9
wisent/inference/client.py +0 -103
wisent/inference/inferencer.py +0 -250
wisent/inference/models.py +0 -66
wisent/utils/__init__.py +0 -3
wisent/utils/auth.py +0 -30
wisent/utils/http.py +0 -228
wisent/version.py +0 -3
wisent-0.1.1.dist-info/METADATA +0 -142
wisent-0.1.1.dist-info/RECORD +0 -23
{wisent-0.1.1.dist-info → wisent-0.5.2.dist-info}/top_level.txt +0 -0

wisent/core/activations/activations_collector.py ADDED Viewed

@@ -0,0 +1,338 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Sequence
+import torch
+from wisent.core.contrastive_pairs.core.pair import ContrastivePair
+from wisent.core.activations.core.atoms import LayerActivations, ActivationAggregationStrategy, LayerName, RawActivationMap
+from wisent.core.models.wisent_model import WisentModel
+__all__ = ["ActivationCollector"]
+@dataclass(slots=True)
+class ActivationCollector:
+    """
+        Collect per-layer activations for (prompt + response) using a chat template.
+        arguments:
+            model:
+                :class: WisentModel
+            store_device:
+                Device to store collected activations on (default "cpu").
+            dtype:
+                Optional torch.dtype to cast activations to (e.g., torch.float32).
+                If None, keep original dtype.
+        detailed explanation:
+        Let:
+        - L = 4 transformer blocks
+        - hidden size H = 256
+        - prompt tokenized length T_prompt = 14
+        - full sequence (prompt + response) tokenized length T_full = 22
+        Step 1: Build templated strings (NOT tokenized yet)
+            prompt_text = tok.apply_chat_template(
+                [{"role": "user", "content": prompt}],
+                tokenize=False, add_generation_prompt=True
+            )
+            full_text   = tok.apply_chat_template(
+                [{"role": "user", "content": prompt},
+                {"role": "assistant", "content": response}],
+                tokenize=False, add_generation_prompt=False
+            )
+        Step 2: Tokenize both with identical flags
+            prompt_enc = tok(prompt_text, return_tensors="pt", add_special_tokens=False)
+            full_enc   = tok(full_text,   return_tensors="pt", add_special_tokens=False)
+        Shapes:
+            prompt_enc["input_ids"].shape == (1, T_prompt) == (1, 14)
+            full_enc["input_ids"].shape   == (1, T_full)   == (1, 22)
+        Boundary:
+            prompt_len = prompt_enc["input_ids"].shape[-1] == 14
+            continuation tokens in the full sequence start at index 14.
+        Step 3: Forward pass with hidden states
+            out = model.hf_model(**full_enc, output_hidden_states=True, use_cache=False)
+            hs  = out.hidden_states
+        hs is a tuple of length L + 1 (includes embedding layer at index 0):
+            len(hs) == 5  -> indices: 0=embeddings, 1..4 = blocks
+            Each hs[i].shape == (1, T_full, H) == (1, 22, 256)
+        We map layer names "1".."L" to hs[1]..hs[L]:
+            "1" -> hs[1], "2" -> hs[2], ..., "4" -> hs[4]
+        Step 4: Per-layer extraction
+            For a chosen layer i (1-based), get hs[i].squeeze(0) -> shape (T_full, H) == (22, 256)
+            If return_full_sequence=True:
+                store value with shape (T_full, H) == (22, 256)
+            Else (aggregate to a single vector [H]):
+                - CONTINUATION_TOKEN / CHOICE_TOKEN: take first continuation token -> cont[0] -> (H,)
+                - FIRST_TOKEN:     layer_seq[0]    -> (H,)
+                - LAST_TOKEN:      layer_seq[-1]   -> (H,)
+                - MEAN_POOLING:    cont.mean(0)    -> (H,)
+                - MAX_POOLING:     cont.max(0)[0]  -> (H,)
+            where:
+                layer_seq = hs[i].squeeze(0)                # (22, 256)
+                cont_start = prompt_len = 14
+                cont = layer_seq[14:]                       # (22-14=8, 256)
+        Step 5: Storage and return
+            - We move each stored tensor to 'store_device' (default "cpu") and cast to 'dtype'
+            if provided (e.g., float32).
+            - Keys are layer names: "1", "2", ..., "L".
+            - Results are wrapped into LayerActivations with `activation_aggregation_strategy`
+            set to your chosen strategy (or None if keeping full sequences).
+        examples:
+            Example usage (aggregated vectors per layer)
+                >>> collector = ActivationCollector(model=my_wrapper, store_device="cpu", dtype=torch.float32)
+                >>> updated_pair = collector.collect_for_pair(
+                ...     pair,
+                ...     layers=["1", "3"],  # subset (or None for all)
+                ...     aggregation=ActivationAggregationStrategy.CONTINUATION_TOKEN,
+                ...     return_full_sequence=False,
+                ... )
+                >>> pos_acts = updated_pair.positive_response.layers_activations
+                >>> pos_acts.summary()
+                    {
+                    '1': {'shape': (256,), 'dtype': 'torch.float32', 'device': 'cpu', 'requires_grad': False},
+                    '3': {'shape': (256,), 'dtype': 'torch.float32', 'device': 'cpu', 'requires_grad': False},
+                    '_activation_aggregation_strategy': {'strategy': 'continuation_token'}
+                    }
+            Example usage (full sequences per layer)
+                >>> updated_pair = collector.collect_for_pair(
+                ...     pair,
+                ...     layers=None,  # all layers "1".."L"
+                ...     aggregation=ActivationAggregationStrategy.MEAN_POOLING,  # ignored when return_full_sequence=True
+                ...     return_full_sequence=True,
+                ... )
+                >>> neg_acts = updated_pair.negative_response.layers_activations
+                >>> # Suppose L=4 and T_full=22, H=256
+                >>> neg_acts.summary()
+                    {
+                    '1': {'shape': (22, 256), 'dtype': 'torch.float32', 'device': 'cpu', 'requires_grad': False},
+                    '2': {'shape': (22, 256), 'dtype': 'torch.float32', 'device': 'cpu', 'requires_grad': False},
+                    '3': {'shape': (22, 256), 'dtype': 'torch.float32', 'device': 'cpu', 'requires_grad': False},
+                    '4': {'shape': (22, 256), 'dtype': 'torch.float32', 'device': 'cpu', 'requires_grad': False},
+                    '_activation_aggregation_strategy': {'strategy': None}
+                    }
+    """
+    model: WisentModel
+    store_device: str | torch.device = "cpu"
+    dtype: torch.dtype | None = None
+    def collect_for_pair(
+        self,
+        pair: ContrastivePair,
+        layers: Sequence[LayerName] | None = None,
+        aggregation: ActivationAggregationStrategy = ActivationAggregationStrategy.CONTINUATION_TOKEN,
+        return_full_sequence: bool = False,
+        normalize_layers: bool = False,
+    ) -> ContrastivePair:
+        pos = self._collect_for_texts(pair.prompt, _resp_text(pair.positive_response),
+                                      layers, aggregation, return_full_sequence, normalize_layers)
+        neg = self._collect_for_texts(pair.prompt, _resp_text(pair.negative_response),
+                                      layers, aggregation, return_full_sequence, normalize_layers)
+        return pair.with_activations(positive=pos, negative=neg)
+    def _collect_for_texts(
+        self,
+        prompt: str,
+        response: str,
+        layers: Sequence[LayerName] | None,
+        aggregation: ActivationAggregationStrategy,
+        return_full_sequence: bool,
+        normalize_layers: bool = False,
+    ) -> LayerActivations:
+        self._ensure_eval_mode()
+        with torch.inference_mode():
+            tok = self.model.tokenizer # type: ignore[union-attr]
+            if not hasattr(tok, "apply_chat_template"):
+                raise RuntimeError("Tokenizer has no apply_chat_template; set it up or use a non-chat path.")
+            # 1) Build templated strings
+            prompt_text = tok.apply_chat_template(
+                [{"role": "user", "content": prompt}],
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+            full_text = tok.apply_chat_template(
+                [{"role": "user", "content": prompt},
+                {"role": "assistant", "content": response}],
+                tokenize=False,
+                add_generation_prompt=False,
+            )
+            # 2) Tokenize both with identical flags
+            prompt_enc = tok(prompt_text, return_tensors="pt", add_special_tokens=False)
+            full_enc   = tok(full_text,   return_tensors="pt", add_special_tokens=False)
+            # 3) Boundary from prompt-only tokens (CPU is fine)
+            prompt_len = int(prompt_enc["input_ids"].shape[-1])
+            # 4) Move only the batch that goes into the model
+            compute_device = getattr(self.model, "compute_device", None) or next(self.model.hf_model.parameters()).device
+            full_enc = {k: v.to(compute_device) for k, v in full_enc.items()}
+            # 5) Forward on the full sequence to get hidden states
+            out = self.model.hf_model(**full_enc, output_hidden_states=True, use_cache=False)
+            hs: tuple[torch.Tensor, ...] = out.hidden_states  # hs[0]=emb, hs[1:]=layers
+            if not hs:
+                raise RuntimeError("No hidden_states returned. Can be due to model not supporting it.")
+            n_blocks = len(hs) - 1
+            names_by_idx = [str(i) for i in range(1, n_blocks + 1)]
+            keep = self._select_indices(layers, n_blocks)
+            collected: RawActivationMap = {}
+            for idx in keep:
+                name = names_by_idx[idx]
+                h = hs[idx + 1].squeeze(0)  # [1, T, H] -> [T, H]
+                if return_full_sequence:
+                    value = h
+                else:
+                    value = self._aggregate(h, aggregation, prompt_len)
+                value = value.to(self.store_device)
+                if self.dtype is not None:
+                    value = value.to(self.dtype)
+                if normalize_layers:
+                    value = self._normalization(value)
+                collected[name] = value
+            return LayerActivations(
+                collected,
+                activation_aggregation_strategy=None if return_full_sequence else aggregation,
+            )
+    def _select_indices(self, layer_names: Sequence[str] | None, n_blocks: int) -> list[int]:
+        """Map layer names '1'..'L' -> indices 0..L-1."""
+        if not layer_names:
+            return list(range(n_blocks))
+        out: list[int] = []
+        for name in layer_names:
+            try:
+                i = int(name)
+            except ValueError:
+                raise KeyError(f"Layer name must be numeric string like '3', got {name!r}")
+            if not (1 <= i <= n_blocks):
+                raise IndexError(f"Layer '{i}' out of range 1..{n_blocks}")
+            out.append(i - 1)
+        return sorted(set(out))
+    def _aggregate(
+        self,
+        layer_seq: torch.Tensor,  # [T, H]
+        aggregation: ActivationAggregationStrategy,
+        prompt_len: int,
+    ) -> torch.Tensor:          # [H]
+        if layer_seq.ndim != 2:
+            raise ValueError(f"Expected [seq_len, hidden_dim], got {tuple(layer_seq.shape)}")
+        # continuation = tokens after the prompt boundary
+        cont_start = min(max(prompt_len, 0), layer_seq.shape[0] - 1)
+        cont = layer_seq[cont_start:] if cont_start < layer_seq.shape[0] else layer_seq[-1:].contiguous()
+        if cont.numel() == 0:
+            cont = layer_seq[-1:].contiguous()
+        s = aggregation
+        if s in (ActivationAggregationStrategy.CONTINUATION_TOKEN):
+            return cont[0]
+        elif s in (ActivationAggregationStrategy.CHOICE_TOKEN):
+            choice_idx = prompt_len + 1
+            if choice_idx < layer_seq.shape[0]:
+                return layer_seq[choice_idx]
+            else:
+                return layer_seq[-1]
+        elif s is ActivationAggregationStrategy.FIRST_TOKEN:
+            return layer_seq[0]
+        elif s is ActivationAggregationStrategy.LAST_TOKEN:
+            return layer_seq[-1]
+        elif s is ActivationAggregationStrategy.MEAN_POOLING:
+            return cont.mean(dim=0)
+        elif s is ActivationAggregationStrategy.MAX_POOLING:
+            return cont.max(dim=0).values
+        else:
+            return cont[0]
+    def _normalization(
+        self,
+        x: torch.Tensor,
+        dim: int = -1,
+        eps: float = 1e-12,
+    ) -> torch.Tensor:
+        """
+        Safely L2-normalize 'x' along 'dim'.
+        arguments:
+            x:
+                Tensor of the shape [..., H] or [T, H]
+            dim:
+                Dimension along which to normalize (default -1, the last dimension).
+            eps:
+                Small value to avoid division by zero (default 1e-12).
+        returns:
+            L2-normalized tensor of the same shape as 'x'.
+        """
+        if not torch.is_floating_point(x):
+            return x
+        norm = torch.linalg.vector_norm(x, ord=2, dim=dim, keepdim=True)
+        mask = norm > eps
+        safe_norm = torch.where(mask, norm, torch.ones_like(norm))
+        y = x / safe_norm
+        y = torch.where(mask, y, torch.zeros_like(y))
+        return y
+    def _ensure_eval_mode(self) -> None:
+        try:
+            self.model.hf_model.eval()
+        except Exception:
+            pass
+def _resp_text(resp_obj: object) -> str:
+    for attr in ("model_response", "text"):
+        if hasattr(resp_obj, attr) and isinstance(getattr(resp_obj, attr), str):
+            return getattr(resp_obj, attr)
+    return str(resp_obj)
+if __name__ == "__main__":
+    from wisent.core.contrastive_pairs.core.pair import ContrastivePair
+    from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
+    model = WisentModel(model_name="/home/gg/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B-Instruct/snapshots/9213176726f574b556790deb65791e0c5aa438b6")
+    collector = ActivationCollector(model=model, store_device="cpu")
+    pair = ContrastivePair(
+        prompt="The capital of France is",
+        positive_response=PositiveResponse(" Paris."),
+        negative_response=NegativeResponse(" London."),
+    )
+    updated = collector.collect_for_pair(
+        pair,
+        layers=["1", "3"],
+        aggregation=ActivationAggregationStrategy.CONTINUATION_TOKEN,
+        return_full_sequence=False,
+    )
+    print(updated)

wisent/core/activations/core/__init__.py ADDED Viewed

File without changes

wisent/core/activations/core/atoms.py ADDED Viewed

@@ -0,0 +1,216 @@
+from __future__ import annotations
+from enum import Enum, auto, unique
+from typing import Mapping, Iterator, TypeAlias
+import numpy as np
+import torch
+import sys
+# Python 3.10 compatibility
+if sys.version_info >= (3, 11):
+    from enum import StrEnum
+else:
+    class StrEnum(str, Enum):
+        """StrEnum backport for Python < 3.11"""
+        def _generate_next_value_(name, start, count, last_values):
+            return name.lower()
+        def __str__(self) -> str:
+            return str(self.value)
+__all__ = ["LayerActivations", "ActivationAggregationStrategy", "ActivationCollector", "LayerName", "LayerActivation", "ActivationMap", "RawActivationMap"]
+LayerName: TypeAlias = str
+LayerActivation: TypeAlias = torch.Tensor | None
+ActivationMap: TypeAlias = Mapping[LayerName, LayerActivation]
+RawActivationMap: TypeAlias = Mapping[LayerName, torch.Tensor | np.ndarray | None]
+class _LowerSnakeStrEnum(StrEnum):
+    """StrEnum whose auto() values are lower_snake_case of the member name."""
+    def _generate_next_value_(name, start, count, last_values): # type: ignore
+        return name.lower()
+@unique
+class ActivationAggregationStrategy(_LowerSnakeStrEnum):
+    """Strategies for selecting/aggregating tokens in activation extraction.
+    """
+    CHOICE_TOKEN = auto()         # target A/B choice tokens (multiple choice)
+    CONTINUATION_TOKEN = auto()   # first token of the continuation
+    LAST_TOKEN = auto()           # always use the last token
+    FIRST_TOKEN = auto()          # always use the first token
+    MEAN_POOLING = auto()         # mean over all tokens
+    MAX_POOLING = auto()          # max over all tokens
+    @property
+    def description(self) -> str:
+        return {
+            ActivationAggregationStrategy.CHOICE_TOKEN: "Target A/B choice tokens (multiple choice).",
+            ActivationAggregationStrategy.CONTINUATION_TOKEN: "Use the first token of the continuation.",
+            ActivationAggregationStrategy.LAST_TOKEN: "Always select the last token.",
+            ActivationAggregationStrategy.FIRST_TOKEN: "Always select the first token.",
+            ActivationAggregationStrategy.MEAN_POOLING: "Aggregate by mean over all tokens.",
+            ActivationAggregationStrategy.MAX_POOLING: "Aggregate by max over all tokens.",
+        }[self]
+class LayerActivations(Mapping[LayerName, LayerActivation]):
+    """Immutable mapping of layer names to activations.
+    Behaves like: 'Mapping[str, torch.Tensor | None]'.
+    construction:
+        'LayerActivations(data: Mapping[str, torch.Tensor | np.ndarray | None] | None, *, dtype: torch.dtype | None = None)'
+        - 'torch.Tensor' values are kept as-is (or cast to 'dtype' if given).
+        - 'np.ndarray' values are converted via 'torch.from_numpy' (then cast if needed).
+        - 'None' values are preserved.
+        -  Missing/empty input yields an empty container.
+    atributes:
+        _data:
+            internal storage dict. It contains information about layer activations.
+        _strategy:
+            'ActivationAggregationStrategy' (see below). Indicates how activations were aggregated if applicable.
+    methods:
+        'summary()':
+            dict with per-layer shape/dtype/device/requires_grad.
+        'to(*args, **kwargs)':
+            apply 'Tensor.to' to all non-'None' values.
+        'cpu()', 'detach()':
+            convenience operations.
+        'numpy()':
+            map tensors to cpu NumPy arrays (others to 'None').
+        'to_dict()':
+            plain dict (useful for (de)serialization).
+    examples:
+        >>> acts = LayerActivations({"layer1": torch.randn(2, 10, 768), "layer2": None}, activation_aggregation_strategy="mean_pooling")
+        >>> acts["layer1"].shape
+        torch.Size([2, 10, 768])
+        >>> acts["layer2"] is None
+        True
+        >>> acts.activation_aggregation_strategy
+        <ActivationAggregationStrategy.MEAN_POOLING: 'mean_pooling'>
+        >>> acts.summary()
+        {'layer1': {'shape': (2, 10, 768), 'dtype': 'torch.float32', 'device': 'cpu', 'requires_grad': False}, 'layer2': {'shape': None, 'dtype': None, 'device': None, 'requires_grad': None}}
+        >>> acts.numpy()
+        {'layer1': array(...), 'layer2': None}
+        >>> acts.to("cuda")
+        LayerActivations(
+          layer1: Tensor(shape=(2, 10, 768), dtype=torch.float32, device=cuda:0)
+          layer2: None
+        )
+        >>> acts.detach()  # if any tensor required grad
+        LayerActivations(
+          layer1: Tensor(shape=(2, 10, 768), dtype=torch.float32, device=cpu)
+          layer2: None
+        )
+    notes:
+        - Use 'summary()' or 'numpy()' if you need JSON-serializable content.
+        - Keys are strings by convention; enforced by type hints.
+    """
+    __slots__ = ("_data", "_strategy")
+    def __init__(self, data: RawActivationMap | None = None, activation_aggregation_strategy: ActivationAggregationStrategy | None = None, dtype: torch.dtype | None = None):
+        store: dict[LayerName, LayerActivation] = {}
+        if data:
+            for layer, val in data.items():
+                if val is None:
+                    store[layer] = None
+                elif isinstance(val, torch.Tensor):
+                    store[layer] = val if dtype is None else val.to(dtype)
+                elif isinstance(val, np.ndarray):
+                    t = torch.from_numpy(val)
+                    store[layer] = t if dtype is None else t.to(dtype)
+                else:
+                    raise TypeError(
+                        f"Activations for layer '{layer}' must be torch.Tensor, np.ndarray, or None."
+                    )
+        self._data = store
+        self._strategy = self._normalize_strategy(activation_aggregation_strategy)
+    @staticmethod
+    def _normalize_strategy(
+        s: ActivationAggregationStrategy | str | None
+    ) -> ActivationAggregationStrategy | None:
+        if s is None:
+            return None
+        if isinstance(s, ActivationAggregationStrategy):
+            return s
+        if isinstance(s, str):
+            try:
+                return ActivationAggregationStrategy(s)
+            except ValueError:
+                valid = ", ".join([e.value for e in ActivationAggregationStrategy])
+                raise ValueError(
+                    f"Unknown activation_agregation_strategy='{s}'. "
+                    f"Valid options: {valid}"
+                )
+        raise TypeError(
+            "activation_agregation_strategy must be ActivationAggregationStrategy | str | None"
+        )
+    @property
+    def activation_aggregation_strategy(self) -> ActivationAggregationStrategy | None:
+        return self._strategy
+    def __getitem__(self, key: LayerName) -> LayerActivation:
+        return self._data[key]
+    def __iter__(self) -> Iterator[LayerName]:
+        return iter(self._data)
+    def __len__(self) -> int:
+        return len(self._data)
+    def summary(self) -> dict[LayerName, dict[str, tuple | str | bool | None]]:
+        ''' Return a summary of the activations. For each layer, provides
+        shape, dtype, device, requires_grad status, and aggregation strategy.
+        '''
+        out: dict[LayerName, dict[str, dict[str, tuple | str | bool | None]]] = {}
+        for k, v in self._data.items():
+            if isinstance(v, torch.Tensor):
+                out[k] = {
+                    "shape": tuple(v.shape),
+                    "dtype": str(v.dtype),
+                    "device": str(v.device),
+                    "requires_grad": bool(v.requires_grad),
+                }
+            else:
+                out[k] = {"shape": None, "dtype": None, "device": None, "requires_grad": None}
+        out["_activation_aggregation_strategy"] = {"strategy": self._strategy.value if self._strategy else None}
+        return out
+    def numpy(self) -> dict[LayerName, np.ndarray | None]:
+        return {k: (v.detach().cpu().numpy() if isinstance(v, torch.Tensor) else None)
+                for k, v in self._data.items()}
+    def to_dict(self) -> dict[LayerName, LayerActivation]:
+        return dict(self._data)
+    def to(self, *args, **kwargs) -> LayerActivations:
+        return LayerActivations({k: (v.to(*args, **kwargs) if isinstance(v, torch.Tensor) else None)
+                                 for k, v in self._data.items()})
+    def detach(self) -> LayerActivations:
+        return LayerActivations({k: (v.detach() if isinstance(v, torch.Tensor) else None)
+                                 for k, v in self._data.items()})
+    def cpu(self) -> LayerActivations:
+        return self.to("cpu")
+    def __repr__(self) -> str:
+        lines = ["LayerActivations("]
+        for k, v in self._data.items():
+            if isinstance(v, torch.Tensor):
+                lines.append(
+                    f"  {k}: Tensor(shape={tuple(v.shape)}, dtype={v.dtype}, device={v.device})"
+                )
+            else:
+                lines.append(f"  {k}: None")
+        lines.append(")")
+        lines.append(f"  _activation_aggregation_strategy: {self._strategy.value if self._strategy else None}")
+        return "\n".join(lines)

wisent/core/agent/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+Agent module for wisent-guard autonomous systems.
+This module provides:
+- ResponseDiagnostics: Response analysis and quality assessment
+- ResponseSteering: Response improvement and steering
+- Data classes for analysis and improvement results
+"""
+from .diagnose import ResponseDiagnostics, AnalysisResult
+from .steer import ResponseSteering, ImprovementResult
+__all__ = [
+    'ResponseDiagnostics',
+    'AnalysisResult',
+    'ResponseSteering',
+    'ImprovementResult'
+]

wisent 0.1.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

wisent 0.1.1py3-none-any.whl → 0.5.2py3-none-any.whl