PyPI - wisent - Versions diffs - 0.5.13__tar.gz → 0.5.15__tar.gz - Mend

wisent 0.5.13tar.gz → 0.5.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (338) hide show

{wisent-0.5.13 → wisent-0.5.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wisent
-Version: 0.5.13
+Version: 0.5.15
 Summary: Monitor and guard against harmful content in language models
 Home-page: https://github.com/yourusername/wisent-activation-guardrails
 Author: Wisent Team
@@ -26,6 +26,8 @@ Requires-Dist: sentence-transformers>=2.0.0
 Requires-Dist: faiss-cpu>=1.7.0
 Provides-Extra: harness
 Requires-Dist: lm-eval==0.4.8; extra == "harness"
+Provides-Extra: cuda
+Requires-Dist: flash-attn>=2.5.0; extra == "cuda"
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier

{wisent-0.5.13 → wisent-0.5.15}/pyproject.toml RENAMED Viewed

@@ -1,9 +1,9 @@
 [tool.pytest.ini_options]
-testpaths = ["tests"]
+testpaths = ["wisent/tests"]
 python_files = ["test_*.py"]
 python_classes = ["Test*"]
 python_functions = ["test_*"]
-addopts = ["-v", "--tb=short", "--ignore=evaluation", "-m", "not slow and not heavy and not docker and not bigcode_required"]
+addopts = ["-v", "--tb=short"]
 markers = [
     "slow: marks tests as slow (deselect with '-m \"not slow\"')",
     "heavy: marks tests as very resource-intensive with large model downloads (deselect with '-m \"not heavy\"')",

{wisent-0.5.13 → wisent-0.5.15}/setup.py RENAMED Viewed

@@ -46,6 +46,9 @@ setup(
         "harness": [
             "lm-eval==0.4.8",
         ],
+        "cuda": [
+            "flash-attn>=2.5.0",
+        ],
     },
     entry_points={
         "console_scripts": [

wisent-0.5.15/wisent/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.5.15"

wisent-0.5.15/wisent/cli.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""
+Programmatic API for running Wisent tasks.
+This module provides a Python API for running tasks that would normally be invoked via CLI.
+"""
+import argparse
+from typing import Dict, Any, Optional
+from wisent.core.cli.tasks import execute_tasks
+def run_task_pipeline(
+    task_name: str,
+    model_name: str,
+    layer: str,
+    training_limit: int,
+    testing_limit: int,
+    seed: int = 42,
+    verbose: bool = False,
+    split_ratio: float = 0.8,
+    limit: Optional[int] = None,
+    steering_mode: bool = False,
+    token_aggregation: str = "average",
+    detection_threshold: float = 0.5,
+    classifier_type: str = "logistic",
+    steering_method: str = "CAA",
+    steering_strength: float = 1.0,
+    token_targeting_strategy: str = "LAST_TOKEN",
+    **kwargs
+) -> Dict[str, Any]:
+    """
+    Run a task pipeline programmatically.
+    This function provides a programmatic interface to the tasks command,
+    allowing other modules to run tasks without invoking the CLI.
+    Args:
+        task_name: Name of the task to run
+        model_name: Name or path of the model
+        layer: Layer to use for activations
+        training_limit: Number of training samples
+        testing_limit: Number of testing samples
+        seed: Random seed for reproducibility
+        verbose: Whether to print verbose output
+        split_ratio: Train/test split ratio
+        limit: Total limit of samples to load
+        steering_mode: Whether to use steering mode
+        token_aggregation: Token aggregation strategy
+        detection_threshold: Detection threshold for classification
+        classifier_type: Type of classifier to use
+        steering_method: Steering method to use (if steering_mode=True)
+        steering_strength: Steering strength (if steering_mode=True)
+        token_targeting_strategy: Token targeting strategy for steering
+        **kwargs: Additional arguments
+    Returns:
+        Dictionary containing results including:
+        - test_accuracy: Test accuracy
+        - test_f1_score: Test F1 score
+        - training_time: Time spent training
+        - evaluation_results: Evaluation results dict
+    """
+    # Create a namespace object that mimics argparse output
+    args = argparse.Namespace()
+    # Set required arguments
+    # task_name is already a string, keep it as a list
+    args.task_names = [task_name] if isinstance(task_name, str) else task_name
+    args.model = model_name
+    args.layer = int(layer) if isinstance(layer, str) else layer
+    args.training_limit = training_limit
+    args.testing_limit = testing_limit
+    args.seed = seed
+    args.verbose = verbose
+    args.split_ratio = split_ratio
+    args.limit = limit if limit is not None else (training_limit + testing_limit + 100)
+    # Set method-specific arguments
+    if steering_mode:
+        args.steering_mode = True
+        args.steering_method = steering_method
+        args.steering_strength = steering_strength
+        args.token_targeting_strategy = token_targeting_strategy
+        args.token_aggregation = token_aggregation
+    else:
+        args.steering_mode = False
+        args.token_aggregation = token_aggregation
+        args.detection_threshold = detection_threshold
+        args.classifier_type = classifier_type
+    # Set defaults for other arguments
+    args.train_only = False
+    args.inference_only = False
+    args.save_classifier = None
+    args.load_classifier = None
+    args.save_steering_vector = None
+    args.load_steering_vector = None
+    args.output = None
+    args.evaluation_report = None
+    args.device = kwargs.get('device', 'cpu')
+    # Execute the task and capture results
+    results = execute_tasks(args)
+    # If execute_tasks returns None (shouldn't happen with our changes, but handle it)
+    if results is None:
+        return {
+            "test_accuracy": 0.0,
+            "test_f1_score": 0.0,
+            "training_time": 0.0,
+            "evaluation_results": {}
+        }
+    return results

{wisent-0.5.13 → wisent-0.5.15}/wisent/core/activations/activations_collector.py RENAMED Viewed

@@ -230,17 +230,25 @@ class ActivationCollector:
             # Use model's built-in chat template
             if not hasattr(tokenizer, "apply_chat_template"):
                 raise RuntimeError("Tokenizer has no apply_chat_template; set it up or use a different strategy.")
-            prompt_text = tokenizer.apply_chat_template(
-                [{"role": "user", "content": prompt}],
-                tokenize=False,
-                add_generation_prompt=True,
-            )
-            full_text = tokenizer.apply_chat_template(
-                [{"role": "user", "content": prompt},
-                 {"role": "assistant", "content": response}],
-                tokenize=False,
-                add_generation_prompt=False,
-            )
+            try:
+                prompt_text = tokenizer.apply_chat_template(
+                    [{"role": "user", "content": prompt}],
+                    tokenize=False,
+                    add_generation_prompt=True,
+                )
+                full_text = tokenizer.apply_chat_template(
+                    [{"role": "user", "content": prompt},
+                     {"role": "assistant", "content": response}],
+                    tokenize=False,
+                    add_generation_prompt=False,
+                )
+            except ValueError as e:
+                if "chat_template is not set" in str(e):
+                    # Fallback to direct completion for models without chat templates
+                    prompt_text = prompt
+                    full_text = f"{prompt} {response}"
+                else:
+                    raise
         elif strategy == PromptConstructionStrategy.DIRECT_COMPLETION:
             # Q → good_resp/bad_resp (direct answer)

wisent-0.5.15/wisent/core/agent/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Empty __init__.py to avoid cascading import errors with empty __init__ pattern

wisent-0.5.15/wisent/core/agent/diagnose/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Empty __init__.py to avoid cascading import errors with empty __init__ pattern

{wisent-0.5.13 → wisent-0.5.15}/wisent/core/cli/__init__.py RENAMED Viewed

@@ -9,7 +9,9 @@ from .generate_vector_from_task import execute_generate_vector_from_task
 from .generate_vector_from_synthetic import execute_generate_vector_from_synthetic
 from .optimize_classification import execute_optimize_classification
 from .optimize_steering import execute_optimize_steering
+from .optimize_sample_size import execute_optimize_sample_size
 from .generate_responses import execute_generate_responses
 from .evaluate_responses import execute_evaluate_responses
+from .multi_steer import execute_multi_steer
-__all__ = ['execute_tasks', 'execute_generate_pairs_from_task', 'execute_generate_pairs', 'execute_get_activations', 'execute_create_steering_vector', 'execute_generate_vector_from_task', 'execute_generate_vector_from_synthetic', 'execute_optimize_classification', 'execute_optimize_steering', 'execute_generate_responses', 'execute_evaluate_responses']
+__all__ = ['execute_tasks', 'execute_generate_pairs_from_task', 'execute_generate_pairs', 'execute_get_activations', 'execute_create_steering_vector', 'execute_generate_vector_from_task', 'execute_generate_vector_from_synthetic', 'execute_optimize_classification', 'execute_optimize_steering', 'execute_optimize_sample_size', 'execute_generate_responses', 'execute_evaluate_responses', 'execute_multi_steer']

{wisent-0.5.13 → wisent-0.5.15}/wisent/core/cli/create_steering_vector.py RENAMED Viewed

@@ -94,28 +94,70 @@ def execute_create_steering_vector(args):
         print(f"   ✓ Generated {len(steering_vectors)} steering vectors")
-        # 5. Save steering vectors to JSON
+        # 5. Save steering vectors (format depends on file extension)
         print(f"\n💾 Saving steering vectors to '{args.output}'...")
-        output_data = {
-            'trait_label': trait_label,
-            'model': model,
-            'method': args.method,
-            'normalize': args.normalize,
-            'token_aggregation': token_aggregation,
-            'num_pairs': len(pairs_list),
-            'layers': list(steering_vectors.keys()),
-            'steering_vectors': steering_vectors,
-            'metadata': {
-                'source_file': args.enriched_pairs_file,
-                'creation_time': time.strftime('%Y-%m-%d %H:%M:%S'),
+        os.makedirs(os.path.dirname(os.path.abspath(args.output)) or '.', exist_ok=True)
+        if args.output.endswith('.pt'):
+            # For .pt format: save single-layer vectors for multi-steer compatibility
+            # If multiple layers, save the first one (or could save all and let user specify)
+            if len(steering_vectors) == 1:
+                layer_str = list(steering_vectors.keys())[0]
+                vector_tensor = torch.tensor(steering_vectors[layer_str], dtype=torch.float32)
+                torch.save({
+                    'steering_vector': vector_tensor,
+                    'layer_index': int(layer_str),
+                    'trait_label': trait_label,
+                    'model': model,
+                    'method': args.method,
+                    'normalize': args.normalize,
+                    'token_aggregation': token_aggregation,
+                    'num_pairs': len(pairs_list),
+                    # Legacy keys for backward compatibility
+                    'vector': vector_tensor,
+                    'layer': int(layer_str),
+                }, args.output)
+                print(f"   ✓ Saved steering vector (layer {layer_str}) to: {args.output}")
+            else:
+                # Save multiple layers - save each to separate file
+                for layer_str in steering_vectors.keys():
+                    layer_output = args.output.replace('.pt', f'_layer_{layer_str}.pt')
+                    vector_tensor = torch.tensor(steering_vectors[layer_str], dtype=torch.float32)
+                    torch.save({
+                        'steering_vector': vector_tensor,
+                        'layer_index': int(layer_str),
+                        'trait_label': trait_label,
+                        'model': model,
+                        'method': args.method,
+                        'normalize': args.normalize,
+                        'token_aggregation': token_aggregation,
+                        'num_pairs': len(pairs_list),
+                        # Legacy keys
+                        'vector': vector_tensor,
+                        'layer': int(layer_str),
+                    }, layer_output)
+                    print(f"   ✓ Saved steering vector (layer {layer_str}) to: {layer_output}")
+        else:
+            # JSON format: save all layers together
+            output_data = {
+                'trait_label': trait_label,
+                'model': model,
+                'method': args.method,
+                'normalize': args.normalize,
+                'token_aggregation': token_aggregation,
+                'num_pairs': len(pairs_list),
+                'layers': list(steering_vectors.keys()),
+                'steering_vectors': steering_vectors,
+                'metadata': {
+                    'source_file': args.enriched_pairs_file,
+                    'creation_time': time.strftime('%Y-%m-%d %H:%M:%S'),
+                }
             }
-        }
-        os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True)
-        with open(args.output, 'w') as f:
-            json.dump(output_data, f, indent=2)
+            with open(args.output, 'w') as f:
+                json.dump(output_data, f, indent=2)
-        print(f"   ✓ Saved steering vectors to: {args.output}")
+            print(f"   ✓ Saved steering vectors to: {args.output}")
         # 6. Display statistics
         print(f"\n📈 Steering Vector Statistics:")

{wisent-0.5.13 → wisent-0.5.15}/wisent/core/cli/evaluate_responses.py RENAMED Viewed

@@ -33,8 +33,14 @@ def execute_evaluate_responses(args):
         with open(args.input, 'r') as f:
             input_data = json.load(f)
-        responses = input_data.get('responses', [])
-        task_name = args.task if args.task else input_data.get('task')
+        # Handle both dict format (with 'responses' key) and direct list format
+        if isinstance(input_data, list):
+            responses = input_data
+            task_name = args.task if args.task else "generic"  # Default to generic for list format
+        else:
+            responses = input_data.get('responses', [])
+            task_name = args.task if args.task else input_data.get('task', "generic")
         if not task_name:
             print(f"   ❌ Task name not found in input file and not provided via --task")
             sys.exit(1)
@@ -299,8 +305,8 @@ def execute_evaluate_responses(args):
         output_data = {
             "input_file": args.input,
-            "task": input_data.get('task'),
-            "model": input_data.get('model'),
+            "task": task_name if isinstance(input_data, list) else input_data.get('task'),
+            "model": None if isinstance(input_data, list) else input_data.get('model'),
             "evaluation_type": evaluation_type,
             "evaluator_used": "CodingEvaluator",
             "aggregated_metrics": aggregated_metrics,
@@ -454,8 +460,8 @@ def execute_evaluate_responses(args):
         output_data = {
             "input_file": args.input,
-            "task": input_data.get('task'),
-            "model": input_data.get('model'),
+            "task": task_name if isinstance(input_data, list) else input_data.get('task'),
+            "model": None if isinstance(input_data, list) else input_data.get('model'),
             "evaluation_type": evaluation_type,
             "evaluator_used": "PersonalizationEvaluator",
             "judge_model": judge_model,
@@ -683,8 +689,8 @@ def execute_evaluate_responses(args):
     output_data = {
         "input_file": args.input,
-        "task": input_data.get('task'),
-        "model": input_data.get('model'),
+        "task": task_name if isinstance(input_data, list) else input_data.get('task'),
+        "model": None if isinstance(input_data, list) else input_data.get('model'),
         "evaluation_type": evaluation_type,
         "evaluator_used": evaluator.name,
         "aggregated_metrics": aggregated_metrics,

{wisent-0.5.13 → wisent-0.5.15}/wisent/core/cli/generate_pairs_from_task.py RENAMED Viewed

@@ -23,7 +23,7 @@ def _build_pairs_from_custom_task(task, limit: int | None):
         LiveCodeBenchExtractor as LiveCodeBenchPairExtractor
     )
-    task_name = task.get_name()
+    task_name = task.task_name
     if task_name == "livecodebench":
         # Use the contrastive pair extractor for LiveCodeBench
@@ -61,10 +61,11 @@ def execute_generate_pairs_from_task(args):
         from wisent.core.contrastive_pairs.lm_eval_pairs.lm_task_pairs_generation import (
             lm_build_contrastive_pairs,
         )
+        from wisent.core.task_interface import TaskInterface
-        # Handle both lm-eval tasks (dict) and custom tasks (TaskInterface)
+        # Handle both lm-eval tasks (dict or ConfigurableTask) and custom tasks (TaskInterface)
         if isinstance(task_obj, dict):
-            # lm-eval task
+            # lm-eval task group with subtasks
             if len(task_obj) != 1:
                 keys = ", ".join(sorted(task_obj.keys()))
                 raise ValueError(
@@ -81,14 +82,26 @@ def execute_generate_pairs_from_task(args):
                 lm_eval_task=task,
                 limit=args.limit,
             )
-        else:
-            # Custom task (TaskInterface)
+        elif isinstance(task_obj, TaskInterface):
+            # Custom task (TaskInterface) - only livecodebench for now
             task = task_obj
             pairs_task_name = args.task_name
             # 2. Generate contrastive pairs using custom task interface
             print(f"   🔨 Building contrastive pairs...")
             pairs = _build_pairs_from_custom_task(task, args.limit)
+        else:
+            # Single lm-eval task (ConfigurableTask), not wrapped in dict
+            task = task_obj
+            pairs_task_name = args.task_name
+            # 2. Generate contrastive pairs using lm-eval interface
+            print(f"   🔨 Building contrastive pairs...")
+            pairs = lm_build_contrastive_pairs(
+                task_name=pairs_task_name,
+                lm_eval_task=task,
+                limit=args.limit,
+            )
         print(f"   ✓ Generated {len(pairs)} contrastive pairs")

{wisent-0.5.13 → wisent-0.5.15}/wisent/core/cli/get_activations.py RENAMED Viewed

@@ -42,7 +42,7 @@ def execute_get_activations(args):
             trait_label = 'unknown'
         # Apply limit if specified
-        if args.limit:
+        if hasattr(args, 'limit') and args.limit:
             pairs_list = pairs_list[:args.limit]
         print(f"   ✓ Loaded {len(pairs_list)} pairs")

wisent-0.5.15/wisent/core/cli/multi_steer.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""Multi-steer command execution logic."""
+import sys
+import os
+import torch
+def execute_multi_steer(args):
+    """Execute the multi-steer command - combine multiple steering vectors and apply to generation."""
+    from wisent.core.multi_steering import MultiSteering, MultiSteeringError
+    from wisent.core.models.wisent_model import WisentModel
+    try:
+        # Check if no vectors provided - unsteered baseline mode
+        if not args.vector or len(args.vector) == 0:
+            print(f"\n🎯 Unsteered Baseline Generation Mode")
+            print(f"   Model: {args.model}")
+            print(f"   No steering vectors provided - generating baseline response")
+            # If prompt is provided, generate unsteered response
+            if hasattr(args, 'prompt') and args.prompt:
+                print(f"\n🤖 Loading model '{args.model}'...")
+                model = WisentModel(args.model, device=args.device)
+                # Generate WITHOUT steering
+                response = model.generate(
+                    [[{"role": "user", "content": args.prompt}]],
+                    max_new_tokens=args.max_new_tokens,
+                    do_sample=True,
+                    temperature=getattr(args, 'temperature', 0.7),
+                    top_p=getattr(args, 'top_p', 0.9)
+                )[0]
+                print(f"\nUnsteered baseline output:\n{response}\n")
+            else:
+                print(f"\n⚠️  No prompt provided. Use --prompt to generate output.")
+            print(f"\n✅ Baseline generation completed successfully!\n")
+            return
+        print(f"\n🎯 Multi-Steering Mode")
+        print(f"   Model: {args.model}")
+        print(f"   Layer: {args.layer}")
+        print(f"   Method: {args.method}")
+        # Initialize multi-steering
+        multi_steer = MultiSteering(device=args.device, method=args.method)
+        # Load and combine vectors
+        multi_steer.load_vectors(args.vector)
+        # Override layer if specified in args
+        if hasattr(args, 'layer') and args.layer:
+            multi_steer.layer = int(args.layer)
+        # Combine vectors
+        normalize = getattr(args, 'normalize_weights', True)
+        multi_steer.combine_vectors(normalize=normalize)
+        # Save combined vector if requested
+        if hasattr(args, 'save_combined') and args.save_combined:
+            print(f"\n💾 Saving combined vector to '{args.save_combined}'...")
+            os.makedirs(os.path.dirname(args.save_combined) or '.', exist_ok=True)
+            torch.save({
+                'steering_vector': multi_steer.combined_vector,
+                'layer_index': multi_steer.layer,
+                'method': args.method,
+                'model': args.model,
+                'weights': multi_steer.weights,
+                'num_vectors': len(multi_steer.loaded_vectors),
+                # Legacy keys for backward compatibility
+                'vector': multi_steer.combined_vector,
+                'layer': multi_steer.layer,
+            }, args.save_combined)
+            print(f"   ✓ Combined vector saved to: {args.save_combined}")
+        # If prompt is provided, apply steering and generate
+        if hasattr(args, 'prompt') and args.prompt:
+            print(f"\n🤖 Loading model '{args.model}'...")
+            model = WisentModel(args.model, device=args.device)
+            # Generate with steering
+            temperature = getattr(args, 'temperature', 0.7)
+            top_p = getattr(args, 'top_p', 0.9)
+            output = multi_steer.apply_steering(
+                model=model,
+                prompt=args.prompt,
+                max_new_tokens=args.max_new_tokens,
+                temperature=temperature,
+                top_p=top_p
+            )
+            print(f"\nGenerated output:\n{output}\n")
+        print(f"\n✅ Multi-steering completed successfully!\n")
+    except MultiSteeringError as e:
+        print(f"\n❌ Multi-steering error: {str(e)}", file=sys.stderr)
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Error: {str(e)}", file=sys.stderr)
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        sys.exit(1)

wisent 0.5.13__tar.gz → 0.5.15__tar.gz

Potentially problematic release.

wisent 0.5.13tar.gz → 0.5.15tar.gz