PyPI - wisent - Versions diffs - 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

wisent 0.1.1py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (237) hide show

wisent/__init__.py +1 -8
wisent/benchmarks/__init__.py +0 -0
wisent/benchmarks/coding/__init__.py +0 -0
wisent/benchmarks/coding/metrics/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
wisent/benchmarks/coding/metrics/evaluator.py +275 -0
wisent/benchmarks/coding/metrics/passk.py +66 -0
wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
wisent/benchmarks/coding/providers/__init__.py +18 -0
wisent/benchmarks/coding/providers/core/__init__.py +0 -0
wisent/benchmarks/coding/providers/core/atoms.py +31 -0
wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
wisent/classifiers/__init__.py +0 -0
wisent/classifiers/core/__init__.py +0 -0
wisent/classifiers/core/atoms.py +747 -0
wisent/classifiers/models/__init__.py +0 -0
wisent/classifiers/models/logistic.py +29 -0
wisent/classifiers/models/mlp.py +47 -0
wisent/cli/__init__.py +0 -0
wisent/cli/classifiers/__init__.py +0 -0
wisent/cli/classifiers/classifier_rotator.py +137 -0
wisent/cli/cli_logger.py +142 -0
wisent/cli/data_loaders/__init__.py +0 -0
wisent/cli/data_loaders/data_loader_rotator.py +96 -0
wisent/cli/evaluators/__init__.py +0 -0
wisent/cli/evaluators/evaluator_rotator.py +148 -0
wisent/cli/steering_methods/__init__.py +0 -0
wisent/cli/steering_methods/steering_rotator.py +110 -0
wisent/cli/wisent_cli/__init__.py +0 -0
wisent/cli/wisent_cli/commands/__init__.py +0 -0
wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
wisent/cli/wisent_cli/commands/listing.py +154 -0
wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
wisent/cli/wisent_cli/main.py +93 -0
wisent/cli/wisent_cli/shell.py +80 -0
wisent/cli/wisent_cli/ui.py +69 -0
wisent/cli/wisent_cli/util/__init__.py +0 -0
wisent/cli/wisent_cli/util/aggregations.py +43 -0
wisent/cli/wisent_cli/util/parsing.py +126 -0
wisent/cli/wisent_cli/version.py +4 -0
wisent/core/__init__.py +27 -0
wisent/core/activations/__init__.py +0 -0
wisent/core/activations/activations_collector.py +338 -0
wisent/core/activations/core/__init__.py +0 -0
wisent/core/activations/core/atoms.py +216 -0
wisent/core/agent/__init__.py +18 -0
wisent/core/agent/budget.py +638 -0
wisent/core/agent/device_benchmarks.py +685 -0
wisent/core/agent/diagnose/__init__.py +55 -0
wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
wisent/core/agent/diagnose/create_classifier.py +1154 -0
wisent/core/agent/diagnose/response_diagnostics.py +268 -0
wisent/core/agent/diagnose/select_classifiers.py +506 -0
wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
wisent/core/agent/diagnose/tasks/__init__.py +33 -0
wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
wisent/core/agent/diagnose.py +242 -0
wisent/core/agent/steer.py +212 -0
wisent/core/agent/timeout.py +134 -0
wisent/core/autonomous_agent.py +1234 -0
wisent/core/bigcode_integration.py +583 -0
wisent/core/contrastive_pairs/__init__.py +15 -0
wisent/core/contrastive_pairs/core/__init__.py +0 -0
wisent/core/contrastive_pairs/core/atoms.py +45 -0
wisent/core/contrastive_pairs/core/buliders.py +59 -0
wisent/core/contrastive_pairs/core/pair.py +178 -0
wisent/core/contrastive_pairs/core/response.py +152 -0
wisent/core/contrastive_pairs/core/serialization.py +300 -0
wisent/core/contrastive_pairs/core/set.py +133 -0
wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
wisent/core/data_loaders/__init__.py +0 -0
wisent/core/data_loaders/core/__init__.py +0 -0
wisent/core/data_loaders/core/atoms.py +98 -0
wisent/core/data_loaders/loaders/__init__.py +0 -0
wisent/core/data_loaders/loaders/custom.py +120 -0
wisent/core/data_loaders/loaders/lm_loader.py +218 -0
wisent/core/detection_handling.py +257 -0
wisent/core/download_full_benchmarks.py +1386 -0
wisent/core/evaluators/__init__.py +0 -0
wisent/core/evaluators/oracles/__init__.py +0 -0
wisent/core/evaluators/oracles/interactive.py +73 -0
wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
wisent/core/evaluators/oracles/user_specified.py +67 -0
wisent/core/hyperparameter_optimizer.py +429 -0
wisent/core/lm_eval_harness_ground_truth.py +1396 -0
wisent/core/log_likelihoods_evaluator.py +321 -0
wisent/core/managed_cached_benchmarks.py +595 -0
wisent/core/mixed_benchmark_sampler.py +364 -0
wisent/core/model_config_manager.py +330 -0
wisent/core/model_persistence.py +317 -0
wisent/core/models/__init__.py +0 -0
wisent/core/models/core/__init__.py +0 -0
wisent/core/models/core/atoms.py +460 -0
wisent/core/models/wisent_model.py +727 -0
wisent/core/multi_steering.py +316 -0
wisent/core/optuna/__init__.py +57 -0
wisent/core/optuna/classifier/__init__.py +25 -0
wisent/core/optuna/classifier/activation_generator.py +349 -0
wisent/core/optuna/classifier/classifier_cache.py +509 -0
wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
wisent/core/optuna/steering/__init__.py +0 -0
wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
wisent/core/optuna/steering/data_utils.py +342 -0
wisent/core/optuna/steering/metrics.py +474 -0
wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
wisent/core/optuna/steering/steering_optimization.py +1111 -0
wisent/core/parser.py +1668 -0
wisent/core/prompts/__init__.py +0 -0
wisent/core/prompts/core/__init__.py +0 -0
wisent/core/prompts/core/atom.py +57 -0
wisent/core/prompts/core/prompt_formater.py +157 -0
wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
wisent/core/representation.py +5 -0
wisent/core/sample_size_optimizer.py +648 -0
wisent/core/sample_size_optimizer_v2.py +355 -0
wisent/core/save_results.py +277 -0
wisent/core/steering.py +652 -0
wisent/core/steering_method.py +26 -0
wisent/core/steering_methods/__init__.py +0 -0
wisent/core/steering_methods/core/__init__.py +0 -0
wisent/core/steering_methods/core/atoms.py +153 -0
wisent/core/steering_methods/methods/__init__.py +0 -0
wisent/core/steering_methods/methods/caa.py +44 -0
wisent/core/steering_optimizer.py +1297 -0
wisent/core/task_interface.py +132 -0
wisent/core/task_selector.py +189 -0
wisent/core/tasks/__init__.py +175 -0
wisent/core/tasks/aime_task.py +141 -0
wisent/core/tasks/file_task.py +211 -0
wisent/core/tasks/hle_task.py +180 -0
wisent/core/tasks/hmmt_task.py +119 -0
wisent/core/tasks/livecodebench_task.py +201 -0
wisent/core/tasks/livemathbench_task.py +158 -0
wisent/core/tasks/lm_eval_task.py +455 -0
wisent/core/tasks/math500_task.py +84 -0
wisent/core/tasks/polymath_task.py +146 -0
wisent/core/tasks/supergpqa_task.py +220 -0
wisent/core/time_estimator.py +149 -0
wisent/core/timing_calibration.py +174 -0
wisent/core/tracking/__init__.py +54 -0
wisent/core/tracking/latency.py +618 -0
wisent/core/tracking/memory.py +359 -0
wisent/core/trainers/__init__.py +0 -0
wisent/core/trainers/core/__init__.py +11 -0
wisent/core/trainers/core/atoms.py +45 -0
wisent/core/trainers/steering_trainer.py +271 -0
wisent/core/user_model_config.py +158 -0
wisent/opti/__init__.py +0 -0
wisent/opti/core/__init__.py +0 -0
wisent/opti/core/atoms.py +175 -0
wisent/opti/methods/__init__.py +0 -0
wisent/opti/methods/opti_classificator.py +172 -0
wisent/opti/methods/opti_steering.py +138 -0
wisent/synthetic/__init__.py +0 -0
wisent/synthetic/cleaners/__init__.py +0 -0
wisent/synthetic/cleaners/core/__init__.py +0 -0
wisent/synthetic/cleaners/core/atoms.py +58 -0
wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
wisent/synthetic/cleaners/methods/__init__.py +0 -0
wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
wisent/synthetic/db_instructions/__init__.py +0 -0
wisent/synthetic/db_instructions/core/__init__.py +0 -0
wisent/synthetic/db_instructions/core/atoms.py +25 -0
wisent/synthetic/db_instructions/mini_dp.py +37 -0
wisent/synthetic/generators/__init__.py +0 -0
wisent/synthetic/generators/core/__init__.py +0 -0
wisent/synthetic/generators/core/atoms.py +73 -0
wisent/synthetic/generators/diversities/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/core.py +68 -0
wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
wisent/synthetic/generators/pairs_generator.py +179 -0
wisent-0.5.1.dist-info/METADATA +67 -0
wisent-0.5.1.dist-info/RECORD +218 -0
{wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/WHEEL +1 -1
{wisent-0.1.1.dist-info → wisent-0.5.1.dist-info/licenses}/LICENSE +2 -2
wisent/activations/__init__.py +0 -9
wisent/activations/client.py +0 -97
wisent/activations/extractor.py +0 -251
wisent/activations/models.py +0 -95
wisent/client.py +0 -45
wisent/control_vector/__init__.py +0 -9
wisent/control_vector/client.py +0 -85
wisent/control_vector/manager.py +0 -168
wisent/control_vector/models.py +0 -70
wisent/inference/__init__.py +0 -9
wisent/inference/client.py +0 -103
wisent/inference/inferencer.py +0 -250
wisent/inference/models.py +0 -66
wisent/utils/__init__.py +0 -3
wisent/utils/auth.py +0 -30
wisent/utils/http.py +0 -228
wisent/version.py +0 -3
wisent-0.1.1.dist-info/METADATA +0 -142
wisent-0.1.1.dist-info/RECORD +0 -23
{wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/top_level.txt +0 -0

wisent/core/sample_size_optimizer.py ADDED Viewed

@@ -0,0 +1,648 @@
+"""
+Sample Size Optimizer for finding the optimal training sample size for classifiers.
+"""
+import json
+import logging
+import os
+import time
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
+from wisent_guard.core.classifier.classifier import Classifier
+from .activations import ActivationAggregationStrategy
+from .contrastive_pairs import ContrastivePairSet
+from .model import Model
+from .model_config_manager import ModelConfigManager
+logger = logging.getLogger(__name__)
+class SampleSizeOptimizer:
+    """Optimizes training sample size for classifiers."""
+    def __init__(
+        self,
+        model_name: str,
+        task_name: str = "truthfulqa_mc1",
+        layer: int = 0,
+        token_aggregation: str = "average",
+        threshold: float = 0.5,
+        test_split: float = 0.2,
+        sample_sizes: Optional[List[int]] = None,
+        device: Optional[str] = None,
+        verbose: bool = False,
+    ):
+        """
+        Initialize the sample size optimizer.
+        Args:
+            model_name: Name of the model to optimize
+            task_name: Task to optimize for
+            layer: Layer index to optimize
+            token_aggregation: Token aggregation method (average, final, first, max, min)
+            threshold: Detection threshold for classification
+            test_split: Fraction of data to use for testing
+            sample_sizes: List of sample sizes to test
+            device: Device to use for computation
+            verbose: Enable verbose output
+        """
+        self.model_name = model_name
+        self.task_name = task_name
+        self.layer = layer
+        self.token_aggregation = token_aggregation
+        self.threshold = threshold
+        self.test_split = test_split
+        self.verbose = verbose
+        # Default sample sizes if not provided
+        if sample_sizes is None:
+            self.sample_sizes = [1, 2, 5, 10, 20, 50, 100, 200, 500]
+        else:
+            self.sample_sizes = sorted(sample_sizes)
+        # Initialize model
+        self.model = Model(name=model_name, device=device)
+        self.device = self.model.device
+        # Storage for results
+        self.results = []
+        self.optimal_sample_size = None
+        logger.info(f"Initialized SampleSizeOptimizer for {model_name}")
+        logger.info(f"Task: {task_name}, Layer: {layer}")
+        logger.info(f"Sample sizes to test: {self.sample_sizes}")
+    def load_and_split_data(self, limit: Optional[int] = None) -> Tuple[ContrastivePairSet, ContrastivePairSet]:
+        """
+        Load task data and split into train/test sets.
+        Args:
+            limit: Maximum number of samples to load (None for all)
+        Returns:
+            Tuple of (train_pairs, test_pairs)
+        """
+        logger.info(f"Loading data for task: {self.task_name}")
+        # Load task data using the model
+        max_samples = limit or 1000  # Default to 1000 if not specified
+        # Try to use cached benchmark data first
+        qa_pairs = None
+        try:
+            from .managed_cached_benchmarks import get_managed_cache
+            cache = get_managed_cache()
+            logger.info(f"Attempting to load from cache with limit={max_samples}")
+            # Load samples from cache (it will download if needed)
+            samples = cache.get_task_samples(self.task_name, limit=max_samples)
+            if samples:
+                logger.info(f"Loaded {len(samples)} samples from cache")
+                # Convert cached samples to QA pairs format
+                qa_pairs = []
+                for sample in samples:
+                    # The cached sample has 'normalized' field with the QA pair
+                    normalized = sample.get("normalized", {})
+                    # Handle both formats: good_response/bad_response and correct_answer
+                    if "good_response" in normalized and "bad_response" in normalized:
+                        qa_pair = {
+                            "question": normalized.get("context", normalized.get("question", "")),
+                            "correct_answer": normalized.get("good_response", ""),
+                            "incorrect_answer": normalized.get("bad_response", ""),
+                            "metadata": normalized.get("metadata", {}),
+                        }
+                    else:
+                        # For truthfulqa_mc1, we need to get incorrect answers from mc1_targets
+                        raw_data = sample.get("raw_data", {})
+                        mc1_targets = raw_data.get("mc1_targets", {})
+                        choices = mc1_targets.get("choices", [])
+                        labels = mc1_targets.get("labels", [])
+                        # Find first incorrect answer
+                        incorrect_answer = None
+                        for i, label in enumerate(labels):
+                            if label == 0 and i < len(choices):
+                                incorrect_answer = choices[i]
+                                break
+                        if not incorrect_answer:
+                            incorrect_answer = "This is incorrect"
+                        qa_pair = {
+                            "question": normalized.get("question", ""),
+                            "correct_answer": normalized.get("correct_answer", ""),
+                            "incorrect_answer": incorrect_answer,
+                            "metadata": normalized.get("metadata", {}),
+                        }
+                    qa_pairs.append(qa_pair)
+                logger.info(f"Converted {len(qa_pairs)} cached samples to QA pairs")
+        except Exception as e:
+            logger.warning(f"Failed to load from cache: {e}")
+            qa_pairs = None
+        # Fallback to loading from lm-eval if cache failed
+        if not qa_pairs:
+            logger.info("Loading from lm-eval harness...")
+            # Load lm-eval task
+            task_data = self.model.load_lm_eval_task(self.task_name, shots=0, limit=max_samples)
+            # Split into train/test docs
+            docs, _ = self.model.split_task_data(task_data, split_ratio=1.0)  # Use all for now
+            if not docs:
+                raise ValueError(f"No documents loaded for task {self.task_name}")
+            logger.info(f"Loaded {len(docs)} documents from {self.task_name}")
+            # Extract QA pairs from task docs
+            qa_pairs = ContrastivePairSet.extract_qa_pairs_from_task_docs(self.task_name, task_data, docs)
+        if not qa_pairs:
+            raise ValueError(f"No QA pairs could be extracted from task {self.task_name}")
+        logger.info(f"Extracted {len(qa_pairs)} QA pairs")
+        # Create contrastive pairs from QA pairs
+        from wisent_guard.core.activations.activation_collection_method import ActivationCollectionLogic
+        collector = ActivationCollectionLogic(model=self.model)
+        # Import token aggregation function
+        # Create contrastive pairs
+        all_pairs = []
+        for qa_pair in qa_pairs:
+            # Create prompts for positive and negative cases
+            question = qa_pair["question"]
+            correct_answer = qa_pair["correct_answer"]
+            incorrect_answer = qa_pair["incorrect_answer"]
+            # Generate with model to get activations
+            # Positive case (correct answer)
+            pos_prompt = self.model.format_prompt(question)
+            pos_response = correct_answer
+            # Negative case (incorrect answer)
+            neg_prompt = self.model.format_prompt(question)
+            neg_response = incorrect_answer
+            # Create contrastive pair
+            from .contrastive_pairs import ContrastivePair
+            from .response import NegativeResponse, PositiveResponse
+            pair = ContrastivePair(
+                prompt=question,
+                positive_response=PositiveResponse(text=pos_response),
+                negative_response=NegativeResponse(text=neg_response),
+            )
+            all_pairs.append(pair)
+        if not all_pairs:
+            raise ValueError(f"No contrastive pairs created for task {self.task_name}")
+        # Extract activations for all pairs at the specified layer
+        logger.info(f"Extracting activations at layer {self.layer}")
+        # Use the collector to extract activations
+        # For MULTIPLE_CHOICE, we use CHOICE_TOKEN targeting
+        all_pairs = collector.collect_activations_batch(
+            all_pairs,
+            layer_index=self.layer,
+            device=self.device,
+            token_targeting_strategy=ActivationAggregationStrategy.CHOICE_TOKEN,
+        )
+        # Filter out any pairs without activations
+        all_pairs = [p for p in all_pairs if p.positive_activations is not None and p.negative_activations is not None]
+        logger.info(f"Loaded {len(all_pairs)} contrastive pairs")
+        # Calculate split index
+        n_test = int(len(all_pairs) * self.test_split)
+        n_train = len(all_pairs) - n_test
+        # Create train and test sets
+        # Use a fixed seed for reproducibility
+        np.random.seed(42)
+        indices = np.random.permutation(len(all_pairs))
+        train_indices = indices[:n_train]
+        test_indices = indices[n_train:]
+        train_pairs = [all_pairs[i] for i in train_indices]
+        test_pairs = [all_pairs[i] for i in test_indices]
+        # Create ContrastivePairSet objects
+        train_set = ContrastivePairSet(name=f"{self.task_name}_train", pairs=train_pairs)
+        test_set = ContrastivePairSet(name=f"{self.task_name}_test", pairs=test_pairs)
+        logger.info(f"Split data: {len(train_pairs)} train, {len(test_pairs)} test")
+        return train_set, test_set
+    def _aggregate_activations(self, activations):
+        """
+        Apply token aggregation to activations based on configured method.
+        Since we're using CHOICE_TOKEN strategy, activations should be a single vector.
+        This method is here for consistency with the main CLI approach.
+        Args:
+            activations: Activation vector or tensor
+        Returns:
+            Aggregated activation vector
+        """
+        # For CHOICE_TOKEN strategy, activations are already a single vector
+        # No aggregation needed
+        return activations
+    def train_classifier_with_sample_size(
+        self, train_set: ContrastivePairSet, sample_size: int
+    ) -> Tuple[Classifier, float]:
+        """
+        Train a classifier with a specific sample size.
+        Args:
+            train_set: Full training set
+            sample_size: Number of samples to use for training
+        Returns:
+            Tuple of (trained_classifier, training_time)
+        """
+        # Limit training set to sample_size
+        if sample_size >= len(train_set.pairs):
+            train_pairs = train_set.pairs
+        else:
+            # Use first sample_size pairs (already shuffled)
+            train_pairs = train_set.pairs[:sample_size]
+        logger.info(f"Training classifier with {len(train_pairs)} samples")
+        # Ensure we have enough samples for training
+        if len(train_pairs) < 2:
+            logger.warning(f"Not enough training samples ({len(train_pairs)}). Skipping.")
+            return None, 0.0
+        # Extract activations
+        X_train = []
+        y_train = []
+        for pair in train_pairs:
+            # Positive example (correct answer)
+            X_train.append(pair.positive_activations)
+            y_train.append(0)  # 0 for correct/truthful
+            # Negative example (incorrect answer)
+            X_train.append(pair.negative_activations)
+            y_train.append(1)  # 1 for incorrect/untruthful
+        # Create and train classifier
+        classifier = Classifier(model_type="logistic", device=self.device)
+        start_time = time.time()
+        classifier.fit(X_train, y_train)
+        training_time = time.time() - start_time
+        return classifier, training_time
+    def evaluate_classifier(self, classifier: Classifier, test_set: ContrastivePairSet) -> Dict[str, float]:
+        """
+        Evaluate a classifier on the test set.
+        Args:
+            classifier: Trained classifier
+            test_set: Test set to evaluate on
+        Returns:
+            Dictionary of metrics
+        """
+        X_test = []
+        y_test = []
+        for pair in test_set.pairs:
+            # Positive example
+            X_test.append(pair.positive_activations)
+            y_test.append(0)
+            # Negative example
+            X_test.append(pair.negative_activations)
+            y_test.append(1)
+        # Get predictions
+        y_pred = []
+        for x in X_test:
+            pred = classifier.predict(x)
+            y_pred.append(1 if pred > 0.5 else 0)
+        # Calculate metrics
+        metrics = {
+            "accuracy": accuracy_score(y_test, y_pred),
+            "precision": precision_score(y_test, y_pred, zero_division=0),
+            "recall": recall_score(y_test, y_pred, zero_division=0),
+            "f1": f1_score(y_test, y_pred, zero_division=0),
+        }
+        return metrics
+    def find_optimal_sample_size(self) -> int:
+        """
+        Determine the optimal sample size based on diminishing returns.
+        Returns:
+            Optimal sample size
+        """
+        if len(self.results) < 2:
+            return self.sample_sizes[-1]
+        # Extract accuracies and times
+        accuracies = [r["metrics"]["accuracy"] for r in self.results]
+        times = [r["training_time"] for r in self.results]
+        sizes = [r["sample_size"] for r in self.results]
+        # Calculate accuracy gains
+        gains = []
+        for i in range(1, len(accuracies)):
+            gain = accuracies[i] - accuracies[i - 1]
+            gains.append(gain)
+        # Find where gain drops below threshold (2% improvement)
+        threshold = 0.02
+        optimal_idx = len(sizes) - 1  # Default to largest
+        for i, gain in enumerate(gains):
+            if gain < threshold and accuracies[i + 1] > 0.7:  # Ensure reasonable accuracy
+                optimal_idx = i + 1
+                break
+        # Also consider training time - if time increases dramatically, prefer smaller
+        if optimal_idx < len(sizes) - 1 and times[optimal_idx] > 0:
+            time_ratio = times[optimal_idx + 1] / times[optimal_idx]
+            if time_ratio > 2.0 and gains[optimal_idx] < 0.01:
+                # Training time doubled for < 1% gain, stick with current
+                pass
+            elif accuracies[optimal_idx + 1] - accuracies[optimal_idx] > 0.05:
+                # Significant accuracy improvement, use larger size
+                optimal_idx += 1
+        return sizes[optimal_idx]
+    def run_optimization(self) -> Dict[str, Any]:
+        """
+        Run the complete sample size optimization process.
+        Returns:
+            Dictionary containing results and optimal sample size
+        """
+        logger.info("Starting sample size optimization...")
+        # Load and split data
+        dataset_limit = getattr(self, "dataset_limit", None)
+        train_set, test_set = self.load_and_split_data(limit=dataset_limit)
+        # Ensure we don't test sample sizes larger than training set
+        max_train_size = len(train_set.pairs)
+        valid_sample_sizes = [s for s in self.sample_sizes if s <= max_train_size]
+        if not valid_sample_sizes:
+            raise ValueError(f"No valid sample sizes. Training set has only {max_train_size} samples.")
+        logger.info(f"Testing sample sizes: {valid_sample_sizes}")
+        # Test each sample size
+        for sample_size in valid_sample_sizes:
+            logger.info(f"\n{'=' * 50}")
+            logger.info(f"Testing sample size: {sample_size}")
+            # Train classifier
+            classifier, training_time = self.train_classifier_with_sample_size(train_set, sample_size)
+            # Skip if classifier training failed
+            if classifier is None:
+                logger.warning(f"Skipping sample size {sample_size} - not enough samples for training")
+                continue
+            # Evaluate on test set
+            metrics = self.evaluate_classifier(classifier, test_set)
+            # Store results
+            result = {"sample_size": sample_size, "training_time": training_time, "metrics": metrics}
+            self.results.append(result)
+            logger.info(f"Accuracy: {metrics['accuracy']:.3f}")
+            logger.info(f"F1 Score: {metrics['f1']:.3f}")
+            logger.info(f"Training time: {training_time:.3f}s")
+        # Find optimal sample size
+        self.optimal_sample_size = self.find_optimal_sample_size()
+        logger.info(f"\n{'=' * 50}")
+        logger.info(f"Optimal sample size: {self.optimal_sample_size}")
+        # Create summary
+        summary = {
+            "model": self.model_name,
+            "task": self.task_name,
+            "layer": self.layer,
+            "test_split": self.test_split,
+            "results": self.results,
+            "optimal_sample_size": self.optimal_sample_size,
+            "timestamp": datetime.now().isoformat(),
+        }
+        return summary
+    def save_results(self, output_dir: Optional[str] = None) -> str:
+        """
+        Save optimization results to file.
+        Args:
+            output_dir: Directory to save results (uses default if None)
+        Returns:
+            Path to saved results file
+        """
+        if output_dir is None:
+            output_dir = "./sample_size_optimization_results"
+        os.makedirs(output_dir, exist_ok=True)
+        # Create filename
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        model_safe = self.model_name.replace("/", "_")
+        filename = f"sample_size_{model_safe}_{self.task_name}_layer{self.layer}_{timestamp}.json"
+        filepath = os.path.join(output_dir, filename)
+        # Prepare data for saving
+        save_data = {
+            "model": self.model_name,
+            "task": self.task_name,
+            "layer": self.layer,
+            "test_split": self.test_split,
+            "results": self.results,
+            "optimal_sample_size": self.optimal_sample_size,
+            "timestamp": datetime.now().isoformat(),
+        }
+        # Save to file
+        with open(filepath, "w") as f:
+            json.dump(save_data, f, indent=2)
+        logger.info(f"Results saved to: {filepath}")
+        return filepath
+    def plot_results(self, save_path: Optional[str] = None, show: bool = True) -> None:
+        """
+        Plot accuracy vs sample size curve.
+        Args:
+            save_path: Path to save plot (optional)
+            show: Whether to display the plot
+        """
+        if not self.results:
+            logger.warning("No results to plot")
+            return
+        # Extract data
+        sizes = [r["sample_size"] for r in self.results]
+        accuracies = [r["metrics"]["accuracy"] for r in self.results]
+        f1_scores = [r["metrics"]["f1"] for r in self.results]
+        times = [r["training_time"] for r in self.results]
+        # Create figure with subplots
+        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))
+        # Plot 1: Accuracy and F1 vs Sample Size
+        ax1.plot(sizes, accuracies, "b-o", label="Accuracy", linewidth=2, markersize=8)
+        ax1.plot(sizes, f1_scores, "g--s", label="F1 Score", linewidth=2, markersize=8)
+        # Mark optimal sample size
+        if self.optimal_sample_size:
+            ax1.axvline(
+                self.optimal_sample_size, color="r", linestyle=":", label=f"Optimal: {self.optimal_sample_size}"
+            )
+        ax1.set_xlabel("Sample Size")
+        ax1.set_ylabel("Score")
+        ax1.set_title(
+            f"Classifier Performance vs Sample Size\n{self.model_name} - {self.task_name} - Layer {self.layer}"
+        )
+        ax1.legend()
+        ax1.grid(True, alpha=0.3)
+        # Use linear scale for x-axis
+        ax1.set_xticks(sizes)
+        ax1.set_xticklabels([str(s) for s in sizes])
+        # Plot 2: Training Time vs Sample Size
+        ax2.plot(sizes, times, "r-^", linewidth=2, markersize=8)
+        ax2.set_xlabel("Sample Size")
+        ax2.set_ylabel("Training Time (seconds)")
+        ax2.set_title("Training Time vs Sample Size")
+        ax2.grid(True, alpha=0.3)
+        # Use linear scale for x-axis
+        ax2.set_xticks(sizes)
+        ax2.set_xticklabels([str(s) for s in sizes])
+        plt.tight_layout()
+        if save_path:
+            plt.savefig(save_path, dpi=300, bbox_inches="tight")
+            logger.info(f"Plot saved to: {save_path}")
+        if show:
+            plt.show()
+        plt.close()
+def run_sample_size_optimization(
+    model_name: str,
+    task_name: str = "truthfulqa_mc1",
+    layer: int = 0,
+    token_aggregation: str = "average",
+    threshold: float = 0.5,
+    test_split: float = 0.2,
+    sample_sizes: Optional[List[int]] = None,
+    dataset_limit: Optional[int] = None,
+    device: Optional[str] = None,
+    verbose: bool = False,
+    save_plot: bool = True,
+    save_to_config: bool = True,
+) -> Dict[str, Any]:
+    """
+    Run sample size optimization and optionally save to model config.
+    Args:
+        model_name: Name of the model
+        task_name: Task to optimize for
+        layer: Layer index
+        token_aggregation: Token aggregation method
+        threshold: Detection threshold
+        test_split: Test split ratio
+        sample_sizes: Sample sizes to test
+        dataset_limit: Maximum number of samples to load from dataset
+        device: Computation device
+        verbose: Verbose output
+        save_plot: Whether to save the plot
+        save_to_config: Whether to save to model config
+    Returns:
+        Optimization results dictionary
+    """
+    # Create optimizer
+    optimizer = SampleSizeOptimizer(
+        model_name=model_name,
+        task_name=task_name,
+        layer=layer,
+        token_aggregation=token_aggregation,
+        threshold=threshold,
+        test_split=test_split,
+        sample_sizes=sample_sizes,
+        device=device,
+        verbose=verbose,
+    )
+    # Run optimization with dataset limit
+    optimizer.dataset_limit = dataset_limit
+    results = optimizer.run_optimization()
+    # Save results
+    results_path = optimizer.save_results()
+    # Create plot
+    if save_plot:
+        plot_path = results_path.replace(".json", ".png")
+        optimizer.plot_results(save_path=plot_path, show=False)
+    # Save to model config if requested
+    if save_to_config and optimizer.optimal_sample_size:
+        config_manager = ModelConfigManager()
+        # Load existing config or create new
+        existing_config = config_manager.load_model_config(model_name)
+        if existing_config:
+            # Update existing config
+            if "optimal_sample_sizes" not in existing_config:
+                existing_config["optimal_sample_sizes"] = {}
+            if task_name not in existing_config["optimal_sample_sizes"]:
+                existing_config["optimal_sample_sizes"][task_name] = {}
+            existing_config["optimal_sample_sizes"][task_name][str(layer)] = optimizer.optimal_sample_size
+            # Save updated config
+            config_manager.update_model_config(model_name, existing_config)
+            logger.info(f"Updated model config with optimal sample size: {optimizer.optimal_sample_size}")
+        else:
+            logger.warning("No existing model config found. Run optimize-classification first.")
+    return results

wisent 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

wisent 0.1.1py3-none-any.whl → 0.5.1py3-none-any.whl