PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl → 0.7.701__py3-none-any.whl - Mend

wisent 0.7.379py3-none-any.whl → 0.7.701py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (725) hide show

wisent/core/cli/optimize_weights.py CHANGED Viewed

@@ -23,11 +23,47 @@ import os
 import re
 import tempfile
 import time
+import subprocess
 from dataclasses import dataclass
 from typing import Any, Callable
 import torch
+def upload_to_s3(local_path: str, s3_bucket: str, s3_key: str) -> bool:
+    """Upload a file or directory to S3."""
+    try:
+        if os.path.isdir(local_path):
+            cmd = ["aws", "s3", "sync", local_path, f"s3://{s3_bucket}/{s3_key}", "--quiet"]
+        else:
+            cmd = ["aws", "s3", "cp", local_path, f"s3://{s3_bucket}/{s3_key}", "--quiet"]
+        subprocess.run(cmd, check=True, capture_output=True)
+        return True
+    except Exception as e:
+        print(f"   Warning: S3 upload failed: {e}")
+        return False
+def download_from_s3(s3_bucket: str, s3_key: str, local_path: str) -> bool:
+    """Download a file or directory from S3."""
+    try:
+        s3_path = f"s3://{s3_bucket}/{s3_key}"
+        # Check if it exists
+        check_cmd = ["aws", "s3", "ls", s3_path]
+        result = subprocess.run(check_cmd, capture_output=True)
+        if result.returncode != 0:
+            return False
+        # Download
+        if s3_key.endswith('/'):
+            cmd = ["aws", "s3", "sync", s3_path, local_path, "--quiet"]
+        else:
+            os.makedirs(os.path.dirname(local_path) or '.', exist_ok=True)
+            cmd = ["aws", "s3", "cp", s3_path, local_path, "--quiet"]
+        subprocess.run(cmd, check=True, capture_output=True)
+        return True
+    except Exception:
+        return False
 from wisent.core.errors import UnknownTypeError, InsufficientDataError
 from wisent.core.models.wisent_model import WisentModel
@@ -204,9 +240,19 @@ def execute_optimize_weights(args):
     # Use checkpointing if checkpoint path is provided
     checkpoint_path = getattr(args, 'checkpoint', None)
     checkpoint_interval = getattr(args, 'checkpoint_interval', 5)
+    s3_bucket = getattr(args, 's3_bucket', None)
-    if checkpoint_path:
-        print(f"   Checkpointing enabled: {checkpoint_path}")
+    # Generate S3 key prefix for this optimization run
+    s3_key_prefix = None
+    if s3_bucket:
+        task_name = args.task.replace(',', '_')[:50] if args.task else (args.trait or 'unknown')[:50]
+        s3_key_prefix = f"optimization-checkpoints/{task_name}/{time.strftime('%Y%m%d-%H%M%S')}"
+        print(f"   S3 bucket: {s3_bucket}")
+        print(f"   S3 key prefix: {s3_key_prefix}")
+    if checkpoint_path or s3_bucket:
+        if checkpoint_path:
+            print(f"   Checkpointing enabled: {checkpoint_path}")
         print(f"   Checkpoint interval: every {checkpoint_interval} trials\n")
         result = optimizer.optimize_with_checkpointing(
             hpo_config,
@@ -214,6 +260,8 @@ def execute_optimize_weights(args):
             checkpoint_interval=checkpoint_interval,
             output_dir=args.output_dir,
             tokenizer=tokenizer,
+            s3_bucket=s3_bucket,
+            s3_key_prefix=s3_key_prefix,
         )
     else:
         result = optimizer.optimize(hpo_config)
@@ -270,6 +318,17 @@ def execute_optimize_weights(args):
     print(f"   Model saved")
     print(f"   Metadata saved to optimization_metadata.json")
+    # Upload to S3 if --s3-bucket is provided
+    s3_bucket = getattr(args, 's3_bucket', None)
+    if s3_bucket:
+        task_name = args.task.replace(',', '_')[:50] if args.task else (args.trait or 'unknown')[:50]
+        s3_key = f"optimization-results/{task_name}/{time.strftime('%Y%m%d-%H%M%S')}"
+        print(f"\n   Uploading results to s3://{s3_bucket}/{s3_key}/...")
+        if upload_to_s3(args.output_dir, s3_bucket, s3_key):
+            print(f"   ✓ Results uploaded to S3")
+        else:
+            print(f"   ✗ S3 upload failed")
     # Save all trials if requested
     if args.save_trials:
         trials_data = [
@@ -569,10 +628,10 @@ def _generate_steering_vectors(args, num_pairs: int, num_layers: int = None) ->
             # Use .pt format for train_unified_goodness output
             temp_output_pt = temp_output.replace('.json', '.pt')
-            # Parse layers - if 'all' or None, use None to let train_unified_goodness pick middle layer
+            # Parse layers - if 'all' or None, use None to let train_unified_goodness pick ALL layers
             layers_arg = args.layers if hasattr(args, 'layers') else None
             if layers_arg == 'all' or layers_arg is None:
-                layers_arg = None  # Will use middle layer
+                layers_arg = None  # Will use ALL layers (train_unified_goodness default)
             vector_args = Namespace(
                 task=args.task,  # Pass comma-separated benchmarks
@@ -907,8 +966,8 @@ def _create_custom_evaluator(args, model_name: str) -> Callable:
             )
             response = result[0] if result else ""
-            # Score using custom evaluator
-            score = custom_eval(response)
+            # Score using custom evaluator - pass prompt for coherence checking
+            score = custom_eval(response, prompt=prompt_text)
             if isinstance(score, dict):
                 # Take the primary score (first value or 'score' key)
                 score = score.get('score', list(score.values())[0])

wisent/core/cli/steering_method_trainer.py CHANGED Viewed

@@ -12,7 +12,8 @@ import torch
 from wisent.core.steering_methods.registry import SteeringMethodRegistry
 from wisent.core.activations.activations_collector import ActivationCollector
-from wisent.core.activations.core.atoms import ActivationAggregationStrategy, LayerActivations
+from wisent.core.activations.extraction_strategy import ExtractionStrategy
+from wisent.core.activations.core.atoms import LayerActivations
 from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
 from wisent.core.models.core.atoms import SteeringPlan, SteeringVector
@@ -141,7 +142,7 @@ def collect_activations_for_pair_set(
     model,
     pair_set: ContrastivePairSet,
     layers: List[str],
-    aggregation: ActivationAggregationStrategy = ActivationAggregationStrategy.LAST_TOKEN,
+    aggregation: ExtractionStrategy = ExtractionStrategy.CHAT_LAST,
 ) -> ContrastivePairSet:
     """
     Collect activations for all pairs in a ContrastivePairSet.
@@ -353,7 +354,7 @@ class UnifiedSteeringTrainer:
         self,
         pair_set: ContrastivePairSet,
         layers: List[str],
-        aggregation: ActivationAggregationStrategy = ActivationAggregationStrategy.LAST_TOKEN,
+        aggregation: ExtractionStrategy = ExtractionStrategy.CHAT_LAST,
     ) -> LayerActivations:
         """Collect activations and train in one step."""
         # Collect activations
@@ -551,7 +552,7 @@ def get_optimal_steering_plan(
     task_name: str,
     train_pairs: "ContrastivePairSet",
     method: str = "*",
-    aggregation: ActivationAggregationStrategy = ActivationAggregationStrategy.LAST_TOKEN,
+    aggregation: ExtractionStrategy = ExtractionStrategy.CHAT_LAST,
 ) -> Optional[Tuple["SteeringPlan", Dict[str, Any]]]:
     """
     Load optimal config and create a ready-to-use steering plan.

wisent/core/cli/steering_search_space.py CHANGED Viewed

@@ -45,11 +45,13 @@ class SensorLayerConfig(str, Enum):
 class BaseSearchSpace:
     """Base search space common to all methods."""
-    layers: List[int] = field(default_factory=lambda: [4, 6, 8, 10, 12])
-    strengths: List[float] = field(default_factory=lambda: [0.5, 1.0, 1.5, 2.0])
-    strategies: List[str] = field(default_factory=lambda: ["constant", "initial_only", "diminishing"])
-    token_aggregations: List[str] = field(default_factory=lambda: ["last_token", "mean_pooling"])
-    prompt_constructions: List[str] = field(default_factory=lambda: ["chat_template", "direct_completion"])
+    # layers MUST be set by get_search_space() to all layers (0 to num_layers-1)
+    # Empty default ensures it's always explicitly set
+    layers: List[int] = field(default_factory=list)
+    strengths: List[float] = field(default_factory=lambda: [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 2.0])
+    strategies: List[str] = field(default_factory=lambda: ["constant", "initial_only", "diminishing", "increasing", "gaussian"])
+    token_aggregations: List[str] = field(default_factory=lambda: ["last_token", "mean_pooling", "first_token", "max_pooling", "continuation_token"])
+    prompt_constructions: List[str] = field(default_factory=lambda: ["chat_template", "direct_completion", "multiple_choice", "role_playing", "instruction_following"])
     def get_total_configs(self) -> int:
         return (
@@ -320,19 +322,22 @@ def get_search_space(method_name: str, num_layers: int, quick: bool = False) ->
     """
     method = method_name.upper()
-    # Compute layer candidates based on model size
+    # Full search uses ALL layers
+    all_layers = list(range(num_layers))
+    # Quick search uses subset of layers
     if num_layers > 20:
-        default_layers = list(range(num_layers // 2, num_layers - 2, 2))
+        quick_layers = list(range(num_layers // 2, num_layers - 2, 2))
     elif num_layers > 12:
-        default_layers = [4, 6, 8, 10, 12]
+        quick_layers = [4, 6, 8, 10, 12]
     else:
-        default_layers = list(range(2, num_layers, 2))
+        quick_layers = list(range(2, num_layers, 2))
     if quick:
         # Reduced search space for quick testing
         if method == "CAA":
             return CAASearchSpace(
-                layers=default_layers[:3],
+                layers=quick_layers[:3],
                 strengths=[0.5, 1.0, 1.5],
                 strategies=["constant"],
                 token_aggregations=["last_token"],
@@ -340,7 +345,7 @@ def get_search_space(method_name: str, num_layers: int, quick: bool = False) ->
             )
         elif method == "PRISM":
             return PRISMSearchSpace(
-                layers=default_layers[:3],
+                layers=quick_layers[:3],
                 strengths=[0.5, 1.0, 1.5],
                 strategies=["constant"],
                 token_aggregations=["last_token"],
@@ -380,18 +385,18 @@ def get_search_space(method_name: str, num_layers: int, quick: bool = False) ->
                 optimization_steps=[100],
             )
-    # Full search space
+    # Full search space - uses ALL layers
     if method == "CAA":
-        return CAASearchSpace(layers=default_layers)
+        return CAASearchSpace(layers=all_layers)
     elif method == "PRISM":
-        return PRISMSearchSpace(layers=default_layers)
+        return PRISMSearchSpace(layers=all_layers)
     elif method == "PULSE":
         return PULSESearchSpace(strengths=[0.5, 1.0, 1.5, 2.0])
     elif method == "TITAN":
         return TITANSearchSpace(strengths=[0.5, 1.0, 1.5, 2.0])
     else:
         # Default to CAA search space
-        return CAASearchSpace(layers=default_layers)
+        return CAASearchSpace(layers=all_layers)
 def get_search_space_from_args(method_name: str, args, num_layers: int) -> BaseSearchSpace:

wisent/core/cli/tasks.py CHANGED Viewed

@@ -14,8 +14,8 @@ def execute_tasks(args):
     from wisent.core.data_loaders.loaders.lm_loader import LMEvalDataLoader
     from wisent.core.models.wisent_model import WisentModel
     from wisent.core.activations.activations_collector import ActivationCollector
-    from wisent.core.activations.core.atoms import ActivationAggregationStrategy
-    from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
+    from wisent.core.activations.extraction_strategy import ExtractionStrategy
     from wisent.core.classifiers.classifiers.models.logistic import LogisticClassifier
     from wisent.core.classifiers.classifiers.models.mlp import MLPClassifier
     from wisent.core.classifiers.classifiers.core.atoms import ClassifierTrainConfig
@@ -161,12 +161,10 @@ def execute_tasks(args):
         # Determine layer range
         if hasattr(args, 'optimize_layers') and args.optimize_layers == 'all':
-            layer_range = list(range(1, num_layers + 1))
+            layer_range = list(range(num_layers))
         else:
-            # Default: test middle 50% of layers
-            start = num_layers // 4
-            end = (3 * num_layers) // 4
-            layer_range = list(range(start, end + 1))
+            # Default: test ALL layers (0-indexed)
+            layer_range = list(range(num_layers))
         # Create optimization config
         config = OptimizationConfig(
@@ -418,32 +416,9 @@ def execute_tasks(args):
     # 5. Collect activations for all pairs
     collector = ActivationCollector(model=model, store_device="cpu")
-    # Map parser values to enum members
-    aggregation_map = {
-        'average': 'MEAN_POOLING',
-        'final': 'LAST_TOKEN',
-        'first': 'FIRST_TOKEN',
-        'max': 'MAX_POOLING',
-        'min': 'MAX_POOLING',  # Fallback to MAX_POOLING for min
-        'max_score': 'MEAN_POOLING',  # Will use mean for training, but max token score for inference
-    }
-    aggregation_key = aggregation_map.get(args.token_aggregation.lower(), 'MEAN_POOLING')
-    aggregation_strategy = ActivationAggregationStrategy[aggregation_key]
-    use_max_token_score = args.token_aggregation.lower() == 'max_score'
-    # Map prompt construction strategy from CLI to enum
-    prompt_strategy_map = {
-        'multiple_choice': PromptConstructionStrategy.MULTIPLE_CHOICE,
-        'role_playing': PromptConstructionStrategy.ROLE_PLAYING,
-        'direct_completion': PromptConstructionStrategy.DIRECT_COMPLETION,
-        'instruction_following': PromptConstructionStrategy.INSTRUCTION_FOLLOWING,
-        'chat_template': PromptConstructionStrategy.CHAT_TEMPLATE,
-    }
-    prompt_strategy = prompt_strategy_map.get(
-        getattr(args, 'prompt_construction_strategy', 'chat_template'),
-        PromptConstructionStrategy.CHAT_TEMPLATE
-    )
-    print(f"   Prompt construction strategy: {prompt_strategy.value}")
+    # Get extraction strategy from args (already an ExtractionStrategy value string)
+    extraction_strategy = ExtractionStrategy(getattr(args, 'extraction_strategy', 'chat_last'))
+    print(f"   Extraction strategy: {extraction_strategy.value}")
     positive_activations = []
     negative_activations = []
@@ -456,13 +431,9 @@ def execute_tasks(args):
             print(f"   Processing pair {i+1}/{len(pair_set.pairs)}...", end='\r')
         # Collect for positive (correct) response
-        updated_pair = collector.collect_for_pair(
-            pair,
+        updated_pair = collector.collect(
+            pair, strategy=extraction_strategy,
             layers=[layer_str],
-            aggregation=aggregation_strategy,
-            return_full_sequence=False,
-            normalize_layers=False,
-            prompt_strategy=prompt_strategy
         )
         # Extract activations from positive and negative responses
@@ -659,10 +630,8 @@ def execute_tasks(args):
         # Collect activation - ActivationCollector will re-run the model with prompt+response
         # First, collect with full sequence to get token-by-token activations
-        collected_full = gen_collector.collect_for_pair(
-            temp_pair,
-            layers=[layer_str],
-            aggregation=aggregation_strategy,
+        collected_full = gen_collector.collect(
+            temp_pair, strategy=aggregation_strategy,
             return_full_sequence=True,
             normalize_layers=False,
             prompt_strategy=prompt_strategy
@@ -686,6 +655,8 @@ def execute_tasks(args):
                         activation_agg = activation_full_seq[0]
                     elif aggregation_strategy.name == 'MAX_POOLING':
                         activation_agg = activation_full_seq.max(dim=0)[0]
+                    elif aggregation_strategy.name == 'MIN_POOLING':
+                        activation_agg = activation_full_seq.min(dim=0)[0]
                     else:
                         # Default to mean
                         activation_agg = activation_full_seq.mean(dim=0)

wisent/core/cli/train_unified_goodness.py CHANGED Viewed

@@ -79,8 +79,8 @@ def execute_train_unified_goodness(args):
     from wisent.core.data_loaders.loaders.lm_loader import LMEvalDataLoader
     from wisent.core.models.wisent_model import WisentModel
     from wisent.core.activations.activations_collector import ActivationCollector
-    from wisent.core.activations.core.atoms import ActivationAggregationStrategy
-    from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
+    from wisent.core.activations.extraction_strategy import ExtractionStrategy
     from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
     from wisent.core.contrastive_pairs.lm_eval_pairs.lm_task_pairs_generation import lm_build_contrastive_pairs
     from wisent.core.steering_methods.methods.caa import CAAMethod
@@ -163,10 +163,9 @@ def execute_train_unified_goodness(args):
             else:
                 layers.append(part)
     else:
-        # Use middle layer by default
-        middle_layer = model.num_layers // 2
-        layers = [str(middle_layer)]
-        print(f"   Using middle layer: {middle_layer}")
+        # Use ALL layers by default
+        layers = [str(i) for i in range(model.num_layers)]
+        print(f"   Using ALL layers: 0 to {model.num_layers - 1}")
     print(f"   ✓ Target layers: {layers}")
@@ -322,28 +321,28 @@ def execute_train_unified_goodness(args):
     # Map aggregation strategy
     aggregation_map = {
-        'average': ActivationAggregationStrategy.MEAN_POOLING,
-        'final': ActivationAggregationStrategy.LAST_TOKEN,
-        'first': ActivationAggregationStrategy.FIRST_TOKEN,
-        'max': ActivationAggregationStrategy.MAX_POOLING,
-        'continuation': ActivationAggregationStrategy.CONTINUATION_TOKEN,
+        'average': ExtractionStrategy.CHAT_MEAN,
+        'final': ExtractionStrategy.CHAT_LAST,
+        'first': ExtractionStrategy.CHAT_FIRST,
+        'max': ExtractionStrategy.CHAT_MAX_NORM,
+        'continuation': ExtractionStrategy.CHAT_GEN_POINT,
     }
     aggregation_strategy = aggregation_map.get(
         args.token_aggregation,
-        ActivationAggregationStrategy.CONTINUATION_TOKEN
+        ExtractionStrategy.CHAT_GEN_POINT
     )
     # Map prompt strategy
     prompt_strategy_map = {
-        'chat_template': PromptConstructionStrategy.CHAT_TEMPLATE,
-        'direct_completion': PromptConstructionStrategy.DIRECT_COMPLETION,
-        'instruction_following': PromptConstructionStrategy.INSTRUCTION_FOLLOWING,
-        'multiple_choice': PromptConstructionStrategy.MULTIPLE_CHOICE,
-        'role_playing': PromptConstructionStrategy.ROLE_PLAYING,
+        'chat_template': ExtractionStrategy.CHAT_LAST,
+        'direct_completion': ExtractionStrategy.CHAT_LAST,
+        'instruction_following': ExtractionStrategy.CHAT_LAST,
+        'multiple_choice': ExtractionStrategy.MC_BALANCED,
+        'role_playing': ExtractionStrategy.ROLE_PLAY,
     }
     prompt_strategy = prompt_strategy_map.get(
         args.prompt_strategy,
-        PromptConstructionStrategy.CHAT_TEMPLATE
+        ExtractionStrategy.CHAT_LAST
     )
     # Try to load activations from checkpoint

wisent 0.7.379__py3-none-any.whl → 0.7.701__py3-none-any.whl

wisent 0.7.379py3-none-any.whl → 0.7.701py3-none-any.whl