PyPI - wisent - Versions diffs - 0.5.14__py3-none-any.whl → 0.5.15__py3-none-any.whl - Mend

wisent 0.5.14py3-none-any.whl → 0.5.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (60) hide show

wisent/__init__.py +1 -1
wisent/cli.py +114 -0
wisent/core/activations/activations_collector.py +19 -11
wisent/core/cli/__init__.py +3 -1
wisent/core/cli/create_steering_vector.py +60 -18
wisent/core/cli/evaluate_responses.py +14 -8
wisent/core/cli/generate_pairs_from_task.py +18 -5
wisent/core/cli/get_activations.py +1 -1
wisent/core/cli/multi_steer.py +108 -0
wisent/core/cli/optimize_classification.py +187 -285
wisent/core/cli/optimize_sample_size.py +78 -0
wisent/core/cli/optimize_steering.py +354 -53
wisent/core/cli/tasks.py +274 -9
wisent/core/errors/__init__.py +0 -0
wisent/core/errors/error_handler.py +134 -0
wisent/core/evaluators/benchmark_specific/log_likelihoods_evaluator.py +152 -295
wisent/core/evaluators/rotator.py +22 -8
wisent/core/main.py +5 -1
wisent/core/model_persistence.py +4 -19
wisent/core/models/wisent_model.py +11 -3
wisent/core/parser.py +4 -3
wisent/core/parser_arguments/main_parser.py +1 -1
wisent/core/parser_arguments/multi_steer_parser.py +4 -3
wisent/core/parser_arguments/optimize_steering_parser.py +4 -0
wisent/core/sample_size_optimizer_v2.py +1 -1
wisent/core/steering_optimizer.py +2 -2
wisent/tests/__init__.py +0 -0
wisent/tests/examples/__init__.py +0 -0
wisent/tests/examples/cli/__init__.py +0 -0
wisent/tests/examples/cli/activations/__init__.py +0 -0
wisent/tests/examples/cli/activations/test_get_activations.py +127 -0
wisent/tests/examples/cli/classifier/__init__.py +0 -0
wisent/tests/examples/cli/classifier/test_classifier_examples.py +141 -0
wisent/tests/examples/cli/contrastive_pairs/__init__.py +0 -0
wisent/tests/examples/cli/contrastive_pairs/test_generate_pairs.py +89 -0
wisent/tests/examples/cli/evaluation/__init__.py +0 -0
wisent/tests/examples/cli/evaluation/test_evaluation_examples.py +117 -0
wisent/tests/examples/cli/generate/__init__.py +0 -0
wisent/tests/examples/cli/generate/test_generate_with_classifier.py +146 -0
wisent/tests/examples/cli/generate/test_generate_with_steering.py +149 -0
wisent/tests/examples/cli/generate/test_only_generate.py +110 -0
wisent/tests/examples/cli/multi_steering/__init__.py +0 -0
wisent/tests/examples/cli/multi_steering/test_multi_steer_from_trained_vectors.py +210 -0
wisent/tests/examples/cli/multi_steering/test_multi_steer_with_different_parameters.py +205 -0
wisent/tests/examples/cli/multi_steering/test_train_and_multi_steer.py +174 -0
wisent/tests/examples/cli/optimizer/__init__.py +0 -0
wisent/tests/examples/cli/optimizer/test_optimize_sample_size.py +102 -0
wisent/tests/examples/cli/optimizer/test_optimizer_examples.py +59 -0
wisent/tests/examples/cli/steering/__init__.py +0 -0
wisent/tests/examples/cli/steering/test_create_steering_vectors.py +135 -0
wisent/tests/examples/cli/synthetic/__init__.py +0 -0
wisent/tests/examples/cli/synthetic/test_synthetic_pairs.py +45 -0
{wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/METADATA +3 -1
{wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/RECORD +59 -29
wisent/core/agent/diagnose/test_synthetic_classifier.py +0 -71
/wisent/core/parser_arguments/{test_nonsense_parser.py → nonsense_parser.py} +0 -0
{wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/WHEEL +0 -0
{wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/entry_points.txt +0 -0
{wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/licenses/LICENSE +0 -0
{wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/top_level.txt +0 -0

wisent/core/parser_arguments/multi_steer_parser.py CHANGED Viewed

@@ -8,14 +8,15 @@ def setup_multi_steer_parser(parser):
         "--vector",
         type=str,
         action="append",
-        required=True,
+        required=False,
+        default=None,
         metavar="PATH:WEIGHT",
-        help="Path to steering vector and its weight (format: path/to/vector.pt:0.5). Can be specified multiple times.",
+        help="Path to steering vector and its weight (format: path/to/vector.pt:0.5). Can be specified multiple times. If omitted, generates unsteered baseline.",
     )
     # Model configuration
     parser.add_argument("--model", type=str, required=True, help="Model name or path")
-    parser.add_argument("--layer", type=int, required=True, help="Layer index to apply combined steering")
+    parser.add_argument("--layer", type=int, required=False, default=None, help="Layer index to apply combined steering (required when using vectors)")
     parser.add_argument("--device", type=str, default=None, help="Device to run on (default: auto-detect)")
     # Steering method configuration

wisent/core/parser_arguments/optimize_steering_parser.py CHANGED Viewed

@@ -31,6 +31,10 @@ def setup_steering_optimizer_parser(parser):
         "--max-time-per-task", type=float, default=20.0, help="Time limit per task in minutes (default: 20.0)"
     )
     comprehensive_parser.add_argument("--no-save", action="store_true", help="Don't save results to model config")
+    comprehensive_parser.add_argument("--save-best-vector", type=str, default=None, help="Save the best steering vector for each task to specified directory")
+    comprehensive_parser.add_argument("--save-generation-examples", action="store_true", help="Generate and save example responses (unsteered vs steered)")
+    comprehensive_parser.add_argument("--num-generation-examples", type=int, default=3, help="Number of generation examples per task (default: 3)")
+    comprehensive_parser.add_argument("--save-all-generation-examples", action="store_true", help="Save generation examples for ALL configurations tested (warning: very slow)")
     comprehensive_parser.add_argument("--device", type=str, default=None, help="Device to run on")
     comprehensive_parser.add_argument("--verbose", action="store_true", help="Enable verbose output")

wisent/core/sample_size_optimizer_v2.py CHANGED Viewed

@@ -12,7 +12,7 @@ from datetime import datetime
 import numpy as np
 import matplotlib.pyplot as plt
-from ..cli import run_task_pipeline
+from wisent.cli import run_task_pipeline
 from .model_config_manager import ModelConfigManager
 logger = logging.getLogger(__name__)

wisent/core/steering_optimizer.py CHANGED Viewed

@@ -497,8 +497,8 @@ class SteeringOptimizer:
         for strength in strengths:
             try:
                 # Run evaluation with this strength
-                from ..cli import run_task_pipeline
+                from wisent.cli import run_task_pipeline
                 # Build kwargs for run_task_pipeline
                 pipeline_kwargs = {
                     'task_name': task_name,

wisent/tests/__init__.py ADDED Viewed

File without changes

wisent/tests/examples/__init__.py ADDED Viewed

File without changes

wisent/tests/examples/cli/__init__.py ADDED Viewed

File without changes

wisent/tests/examples/cli/activations/__init__.py ADDED Viewed

File without changes

wisent/tests/examples/cli/activations/test_get_activations.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""
+Tests for activation extraction examples.
+Validates get-activations command with contrastive pairs.
+"""
+import subprocess
+import pytest
+import tempfile
+import os
+import json
+def create_test_pairs_file(filepath):
+    """Create a simple test pairs JSON file."""
+    pairs = [
+        {
+            "prompt": "What color is the sky?",
+            "positive_response": {
+                "model_response": "The sky is blue."
+            },
+            "negative_response": {
+                "model_response": "The sky is green."
+            },
+            "label": "truthfulness"
+        },
+        {
+            "prompt": "What is the chemical formula for water?",
+            "positive_response": {
+                "model_response": "Water is H2O."
+            },
+            "negative_response": {
+                "model_response": "Water is CO2."
+            },
+            "label": "truthfulness"
+        }
+    ]
+    with open(filepath, 'w') as f:
+        json.dump(pairs, f)
+def test_get_activations_from_pairs():
+    """Test extracting activations from contrastive pairs."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        input_file = os.path.join(tmpdir, "pairs.json")
+        output_file = os.path.join(tmpdir, "pairs_with_activations.json")
+        # Create test pairs
+        create_test_pairs_file(input_file)
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "get-activations",
+                input_file,
+                "--output", output_file,
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--layers", "3",
+                "--token-aggregation", "average",
+                "--device", "cpu",
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+        assert os.path.exists(output_file), "Output file not created"
+def test_get_activations_multiple_layers():
+    """Test extracting activations from multiple layers."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        input_file = os.path.join(tmpdir, "pairs.json")
+        output_file = os.path.join(tmpdir, "multilayer_activations.json")
+        create_test_pairs_file(input_file)
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "get-activations",
+                input_file,
+                "--output", output_file,
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--layers", "2,3,4",
+                "--token-aggregation", "average",
+                "--device", "cpu",
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+        assert os.path.exists(output_file), "Output file not created"
+def test_get_activations_different_aggregation():
+    """Test different token aggregation strategies."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        input_file = os.path.join(tmpdir, "pairs.json")
+        output_file = os.path.join(tmpdir, "final_token_activations.json")
+        create_test_pairs_file(input_file)
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "get-activations",
+                input_file,
+                "--output", output_file,
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--layers", "3",
+                "--token-aggregation", "final",
+                "--device", "cpu",
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

wisent/tests/examples/cli/classifier/__init__.py ADDED Viewed

File without changes

wisent/tests/examples/cli/classifier/test_classifier_examples.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""
+Tests for classifier examples.
+Validates classifier training, loading, and evaluation workflows.
+"""
+import subprocess
+import pytest
+import tempfile
+import os
+def test_train_classifier_and_save():
+    """Test training a classifier and saving it."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        classifier_path = os.path.join(tmpdir, "classifier.pt")
+        output_dir = os.path.join(tmpdir, "training")
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "tasks", "boolq",
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--layer", "3",
+                "--classifier-type", "logistic",
+                "--limit", "20",
+                "--save-classifier", classifier_path,
+                "--output", output_dir,
+                "--device", "cpu"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+        assert os.path.exists(classifier_path), "Classifier not saved"
+def test_use_pretrained_classifier():
+    """Test loading and using a pretrained classifier."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        classifier_path = os.path.join(tmpdir, "classifier.pt")
+        training_output = os.path.join(tmpdir, "training")
+        inference_output = os.path.join(tmpdir, "inference")
+        # Train classifier first
+        train_result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "tasks", "boolq",
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--layer", "3",
+                "--classifier-type", "logistic",
+                "--limit", "20",
+                "--save-classifier", classifier_path,
+                "--output", training_output,
+                "--device", "cpu"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert train_result.returncode == 0
+        # Use pretrained classifier
+        inference_result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "tasks", "boolq",
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--layer", "3",
+                "--load-classifier", classifier_path,
+                "--inference-only",
+                "--testing-limit", "10",
+                "--output", inference_output,
+                "--device", "cpu"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=600
+        )
+        assert inference_result.returncode == 0, f"Inference failed: {inference_result.stderr}"
+def test_run_and_evaluate_on_benchmark():
+    """Test training and evaluating classifier on benchmark."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_dir = os.path.join(tmpdir, "benchmark")
+        report_file = os.path.join(output_dir, "report.json")
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "tasks", "boolq",
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--layer", "3",
+                "--classifier-type", "logistic",
+                "--training-limit", "20",
+                "--testing-limit", "10",
+                "--token-aggregation", "average",
+                "--detection-threshold", "0.6",
+                "--output", output_dir,
+                "--evaluation-report", report_file,
+                "--device", "cpu",
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+def test_classifier_with_mlp():
+    """Test training MLP classifier (not just logistic)."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        classifier_path = os.path.join(tmpdir, "mlp_classifier.pt")
+        output_dir = os.path.join(tmpdir, "training")
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "tasks", "boolq",
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--layer", "3",
+                "--classifier-type", "mlp",
+                "--limit", "20",
+                "--save-classifier", classifier_path,
+                "--output", output_dir,
+                "--device", "cpu"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+        assert os.path.exists(classifier_path), "MLP classifier not saved"
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

wisent/tests/examples/cli/contrastive_pairs/__init__.py ADDED Viewed

File without changes

wisent/tests/examples/cli/contrastive_pairs/test_generate_pairs.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""
+Tests for contrastive pairs generation examples.
+Validates pair generation from tasks and synthetic generation.
+"""
+import subprocess
+import pytest
+import tempfile
+import os
+import json
+def test_generate_pairs_from_task():
+    """Test generating contrastive pairs from lm-eval task."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_file = os.path.join(tmpdir, "pairs.json")
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "generate-pairs-from-task", "boolq",
+                "--output", output_file,
+                "--limit", "10",
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+        assert os.path.exists(output_file), "Output file not created"
+        # Verify JSON format
+        with open(output_file, 'r') as f:
+            data = json.load(f)
+            assert isinstance(data, (list, dict)), "Output should be JSON"
+def test_generate_synthetic_pairs():
+    """Test generating synthetic contrastive pairs."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_file = os.path.join(tmpdir, "synthetic_pairs.json")
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "generate-pairs",
+                "--trait", "truthfulness",
+                "--num-pairs", "5",
+                "--output", output_file,
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--similarity-threshold", "0.8",
+                "--device", "cpu",
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+        assert os.path.exists(output_file), "Output file not created"
+def test_generate_synthetic_pairs_different_trait():
+    """Test synthetic pair generation with different trait."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_file = os.path.join(tmpdir, "helpfulness_pairs.json")
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "generate-pairs",
+                "--trait", "being helpful and informative",
+                "--num-pairs", "5",
+                "--output", output_file,
+                "--model", "meta-llama/Llama-3.2-1B-Instruct",
+                "--device", "cpu",
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

wisent/tests/examples/cli/evaluation/__init__.py ADDED Viewed

File without changes

wisent/tests/examples/cli/evaluation/test_evaluation_examples.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""
+Tests for evaluation examples.
+Validates response evaluation and personalization assessment.
+"""
+import subprocess
+import pytest
+import tempfile
+import os
+import json
+def create_test_responses_file(filepath):
+    """Create a test responses JSON file."""
+    responses = [
+        {
+            "question": "What is 2+2?",
+            "response": "4",
+            "expected": "4",
+            "choices": ["3", "4", "5", "6"]
+        },
+        {
+            "question": "What is the capital of France?",
+            "response": "Paris",
+            "expected": "Paris",
+            "choices": ["London", "Paris", "Berlin", "Rome"]
+        }
+    ]
+    with open(filepath, 'w') as f:
+        json.dump(responses, f)
+def test_generate_responses_from_task():
+    """Test generating responses from a task."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_file = os.path.join(tmpdir, "responses.json")
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "generate-responses",
+                "meta-llama/Llama-3.2-1B-Instruct",
+                "--task", "boolq",
+                "--num-questions", "3",
+                "--max-new-tokens", "50",
+                "--temperature", "0.7",
+                "--device", "cpu",
+                "--output", output_file,
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+        assert os.path.exists(output_file), "Output file not created"
+def test_evaluate_generated_responses():
+    """Test evaluating generated responses."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        input_file = os.path.join(tmpdir, "responses.json")
+        output_file = os.path.join(tmpdir, "evaluation.json")
+        # Create test responses
+        create_test_responses_file(input_file)
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "evaluate-responses",
+                "--input", input_file,
+                "--output", output_file,
+                "--task", "boolq",
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+        assert os.path.exists(output_file), "Output file not created"
+def test_evaluate_personalization():
+    """Test evaluating personalization responses."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        input_file = os.path.join(tmpdir, "personalization.json")
+        output_file = os.path.join(tmpdir, "evaluation.json")
+        # Create test responses
+        responses = [
+            {"question": "Test Q", "response": "Test A", "trait": "helpful"}
+        ]
+        with open(input_file, 'w') as f:
+            json.dump(responses, f)
+        result = subprocess.run(
+            [
+                "python", "-m", "wisent.core.main", "evaluate-responses",
+                "--input", input_file,
+                "--output", output_file,
+                "--task", "personalization",
+                "--trait", "helpful",
+                "--verbose"
+            ],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

wisent/tests/examples/cli/generate/__init__.py ADDED Viewed

File without changes

wisent 0.5.14__py3-none-any.whl → 0.5.15__py3-none-any.whl

Potentially problematic release.

wisent 0.5.14py3-none-any.whl → 0.5.15py3-none-any.whl