PyPI - wisent - Versions diffs - 0.7.701__py3-none-any.whl → 0.7.901__py3-none-any.whl - Mend

wisent 0.7.701py3-none-any.whl → 0.7.901py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (330) hide show

wisent/core/cli/cluster_benchmarks.py CHANGED Viewed

@@ -33,7 +33,6 @@ STRATEGIES = [
     "chat_mean",
     "chat_first",
     "chat_last",
-    "chat_gen_point",
     "chat_max_norm",
     "chat_weighted",
     "role_play",
@@ -134,9 +133,9 @@ def get_weighted_mean_answer_act(model, tokenizer, text: str, answer: str, layer
     hidden = outputs.hidden_states[layer][0]
     if num_answer_tokens > 0 and num_answer_tokens < hidden.shape[0]:
         answer_hidden = hidden[-num_answer_tokens-1:-1, :]
-        weights = torch.exp(-torch.arange(answer_hidden.shape[0], dtype=torch.float32) * 0.5)
+        weights = torch.exp(-torch.arange(answer_hidden.shape[0], dtype=answer_hidden.dtype, device=answer_hidden.device) * 0.5)
         weights = weights / weights.sum()
-        weighted_mean = (answer_hidden * weights.unsqueeze(1).to(answer_hidden.device)).sum(dim=0)
+        weighted_mean = (answer_hidden * weights.unsqueeze(1)).sum(dim=0)
         return weighted_mean.cpu().float()
     return hidden[-1].cpu().float()
@@ -156,8 +155,6 @@ def get_activation(model, tokenizer, prompt: str, response: str, layer: int, dev
             return get_first_answer_token_act(model, tokenizer, text, response, layer, device)
         elif strategy == "chat_last":
             return get_last_token_act(model, tokenizer, text, layer, device)
-        elif strategy == "chat_gen_point":
-            return get_generation_point_act(model, tokenizer, text, response, layer, device)
         elif strategy == "chat_max_norm":
             return get_max_norm_answer_act(model, tokenizer, text, response, layer, device)
         elif strategy == "chat_weighted":
@@ -348,7 +345,8 @@ def execute_cluster_benchmarks(args):
     logger.info(f"Loading {model}...")
     tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)
-    dtype = torch.bfloat16 if device == 'cuda' else torch.float16
+    from wisent.core.utils.device import device_optimized_dtype
+    dtype = device_optimized_dtype(device)
     llm = AutoModelForCausalLM.from_pretrained(model, torch_dtype=dtype, device_map=device, trust_remote_code=True)
     layers = get_layers_to_test(llm)

wisent/core/cli/create_steering_vector.py CHANGED Viewed

@@ -8,6 +8,7 @@ import torch
 from collections import defaultdict
 from wisent.core.errors import SteeringMethodUnknownError, VectorQualityTooLowError
+from wisent.core.utils.device import preferred_dtype
 def execute_create_steering_vector(args):
@@ -46,20 +47,21 @@ def execute_create_steering_vector(args):
         # Structure: {layer_str: {"positive": [tensors], "negative": [tensors]}}
         layer_activations = defaultdict(lambda: {"positive": [], "negative": []})
+        dtype = preferred_dtype()
         for pair in pairs_list:
             # Extract positive activations
             pos_layers = pair['positive_response'].get('layers_activations', {})
             for layer_str, activation_list in pos_layers.items():
                 if activation_list is not None:
-                    tensor = torch.tensor(activation_list, dtype=torch.float32)
+                    tensor = torch.tensor(activation_list, dtype=dtype)
                     layer_activations[layer_str]["positive"].append(tensor)
             # Extract negative activations
             neg_layers = pair['negative_response'].get('layers_activations', {})
             for layer_str, activation_list in neg_layers.items():
                 if activation_list is not None:
-                    tensor = torch.tensor(activation_list, dtype=torch.float32)
+                    tensor = torch.tensor(activation_list, dtype=dtype)
                     layer_activations[layer_str]["negative"].append(tensor)
         available_layers = sorted(layer_activations.keys(), key=int)
@@ -232,7 +234,7 @@ def execute_create_steering_vector(args):
             # If multiple layers, save the first one (or could save all and let user specify)
             if len(steering_vectors) == 1:
                 layer_str = list(steering_vectors.keys())[0]
-                vector_tensor = torch.tensor(steering_vectors[layer_str], dtype=torch.float32)
+                vector_tensor = torch.tensor(steering_vectors[layer_str], dtype=dtype)
                 torch.save({
                     'steering_vector': vector_tensor,
                     'layer_index': int(layer_str),
@@ -251,7 +253,7 @@ def execute_create_steering_vector(args):
                 # Save multiple layers - save each to separate file
                 for layer_str in steering_vectors.keys():
                     layer_output = args.output.replace('.pt', f'_layer_{layer_str}.pt')
-                    vector_tensor = torch.tensor(steering_vectors[layer_str], dtype=torch.float32)
+                    vector_tensor = torch.tensor(steering_vectors[layer_str], dtype=dtype)
                     torch.save({
                         'steering_vector': vector_tensor,
                         'layer_index': int(layer_str),

wisent/core/cli/diagnose_vectors.py CHANGED Viewed

@@ -6,6 +6,7 @@ import os
 import math
 import torch
+from wisent.core.utils.device import preferred_dtype
 def execute_diagnose_vectors(args):
@@ -227,10 +228,11 @@ def _run_cone_analysis(
             return
         # Convert to tensors if needed
+        dtype = preferred_dtype()
         if not isinstance(pos_acts, torch.Tensor):
-            pos_acts = torch.tensor(pos_acts, dtype=torch.float32)
+            pos_acts = torch.tensor(pos_acts, dtype=dtype)
         if not isinstance(neg_acts, torch.Tensor):
-            neg_acts = torch.tensor(neg_acts, dtype=torch.float32)
+            neg_acts = torch.tensor(neg_acts, dtype=dtype)
         print(f"   Positive samples: {pos_acts.shape[0]}")
         print(f"   Negative samples: {neg_acts.shape[0]}")
@@ -342,10 +344,11 @@ def _run_geometry_analysis(
             return
         # Convert to tensors
+        dtype = preferred_dtype()
         if not isinstance(pos_acts, torch.Tensor):
-            pos_acts = torch.tensor(pos_acts, dtype=torch.float32)
+            pos_acts = torch.tensor(pos_acts, dtype=dtype)
         if not isinstance(neg_acts, torch.Tensor):
-            neg_acts = torch.tensor(neg_acts, dtype=torch.float32)
+            neg_acts = torch.tensor(neg_acts, dtype=dtype)
         print(f"   Positive samples: {pos_acts.shape[0]}")
         print(f"   Negative samples: {neg_acts.shape[0]}")

wisent/core/cli/estimate_unified_goodness_time.py CHANGED Viewed

@@ -141,8 +141,10 @@ def estimate_runtime(
     results = {}
     # 1. Model loading (one-time)
-    if device == 'cpu':
-        model_time = TIME_ESTIMATES['model_load_cpu']
+    if device == 'cpu' or device == 'auto':
+        from wisent.core.utils.device import resolve_default_device
+        actual_device = resolve_default_device() if device == 'auto' else device
+        model_time = TIME_ESTIMATES['model_load_cpu'] if actual_device == 'cpu' else TIME_ESTIMATES['model_load_gpu']
     else:
         model_time = TIME_ESTIMATES['model_load_gpu']
     results['model_loading'] = model_time
@@ -269,8 +271,8 @@ def main():
         help="Skip evaluation phase"
     )
     parser.add_argument(
-        "--device", choices=["cuda", "cpu"], default="cuda",
-        help="Device for computation"
+        "--device", choices=["cuda", "cpu", "mps", "auto"], default="auto",
+        help="Device for computation (auto = detect best available)"
     )
     parser.add_argument(
         "--show-breakdown", action="store_true",

wisent/core/cli/generate_pairs_from_task.py CHANGED Viewed

@@ -4,8 +4,6 @@ import sys
 import json
 import os
-from wisent.core.errors import InvalidDataFormatError
 def execute_generate_pairs_from_task(args):
     """Execute the generate-pairs-from-task command - load and save contrastive pairs from a task."""
@@ -14,9 +12,8 @@ def execute_generate_pairs_from_task(args):
     if hasattr(args, 'task_name') and args.task_name:
         args.task_name = expand_task_if_skill_or_risk(args.task_name)
-    from wisent.core.contrastive_pairs.huggingface_pairs.hf_extractor_manifest import HF_EXTRACTORS
     from wisent.core.contrastive_pairs.lm_eval_pairs.lm_task_pairs_generation import (
-        lm_build_contrastive_pairs,
+        build_contrastive_pairs,
     )
     print(f"\n📊 Generating contrastive pairs from task: {args.task_name}")
@@ -26,58 +23,14 @@ def execute_generate_pairs_from_task(args):
     try:
         print(f"\n🔄 Loading task '{args.task_name}'...")
-        # Check if task is in HuggingFace manifest (doesn't need lm-eval loading)
-        task_name_lower = args.task_name.lower()
-        is_hf_task = task_name_lower in {k.lower() for k in HF_EXTRACTORS.keys()}
-        if is_hf_task:
-            # HuggingFace task - skip lm-eval loading, go directly to extractor
-            print(f"   Found in HuggingFace manifest, using HF extractor...")
-            print(f"   🔨 Building contrastive pairs...")
-            pairs = lm_build_contrastive_pairs(
-                task_name=args.task_name,
-                lm_eval_task=None,  # HF extractors don't need lm_eval_task
-                limit=args.limit,
-            )
-            pairs_task_name = args.task_name
-        else:
-            # lm-eval task - load via LMEvalDataLoader
-            from wisent.core.data_loaders.loaders.lm_loader import LMEvalDataLoader
-            loader = LMEvalDataLoader()
-            task_obj = loader.load_lm_eval_task(args.task_name)
-            # Handle both lm-eval tasks (dict or ConfigurableTask)
-            if isinstance(task_obj, dict):
-                # lm-eval task group with subtasks
-                if len(task_obj) != 1:
-                    keys = ", ".join(sorted(task_obj.keys()))
-                    raise InvalidDataFormatError(
-                        reason=f"Task '{args.task_name}' returned {len(task_obj)} subtasks ({keys}). "
-                               "Specify an explicit subtask, e.g. 'benchmark/subtask'."
-                    )
-                (subname, task), = task_obj.items()
-                pairs_task_name = subname
-                # Generate contrastive pairs using lm-eval interface
-                print(f"   🔨 Building contrastive pairs...")
-                pairs = lm_build_contrastive_pairs(
-                    task_name=pairs_task_name,
-                    lm_eval_task=task,
-                    limit=args.limit,
-                )
-            else:
-                # Single lm-eval task (ConfigurableTask), not wrapped in dict
-                task = task_obj
-                pairs_task_name = args.task_name
-                # Generate contrastive pairs using lm-eval interface
-                print(f"   🔨 Building contrastive pairs...")
-                pairs = lm_build_contrastive_pairs(
-                    task_name=pairs_task_name,
-                    lm_eval_task=task,
-                    limit=args.limit,
-                )
+        print(f"   🔨 Building contrastive pairs...")
+        # Use unified loader - handles HF, lm-eval, and group tasks automatically
+        pairs = build_contrastive_pairs(
+            task_name=args.task_name,
+            limit=args.limit,
+        )
+        pairs_task_name = args.task_name
         print(f"   ✓ Generated {len(pairs)} contrastive pairs")

wisent/core/cli/geometry_search.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""Run geometry search across benchmarks to find unified goodness direction."""
+import json
+import sys
+import os
+from pathlib import Path
+def execute_geometry_search(args):
+    """Execute the geometry-search command."""
+    print(f"\n{'='*60}")
+    print("GEOMETRY SEARCH")
+    print(f"{'='*60}")
+    print(f"Model: {args.model}")
+    print(f"Output: {args.output}")
+    print(f"Pairs per benchmark: {args.pairs_per_benchmark}")
+    print(f"Max layer combo size: {args.max_layer_combo_size}")
+    # Import dependencies
+    from wisent.core.models.wisent_model import WisentModel
+    from wisent.core.geometry_search_space import GeometrySearchSpace, GeometrySearchConfig
+    from wisent.core.geometry_runner import GeometryRunner
+    from wisent.core.activations.extraction_strategy import ExtractionStrategy
+    # Parse strategies
+    if args.strategies:
+        strategy_names = [s.strip() for s in args.strategies.split(',')]
+        strategies = [ExtractionStrategy(s) for s in strategy_names]
+        print(f"Strategies: {strategy_names}")
+    else:
+        strategies = None  # Use default (all 7)
+        print("Strategies: all 7 default strategies")
+    # Parse benchmarks
+    if args.benchmarks:
+        if args.benchmarks.endswith('.txt'):
+            with open(args.benchmarks) as f:
+                benchmarks = [line.strip() for line in f if line.strip()]
+        else:
+            benchmarks = [b.strip() for b in args.benchmarks.split(',')]
+        print(f"Benchmarks: {len(benchmarks)} specified")
+    else:
+        benchmarks = None  # Use default (all)
+        print("Benchmarks: all available")
+    # Create config
+    config = GeometrySearchConfig(
+        pairs_per_benchmark=args.pairs_per_benchmark,
+        max_layer_combo_size=args.max_layer_combo_size,
+        random_seed=args.seed,
+        cache_activations=True,
+        cache_dir=args.cache_dir,
+    )
+    # Create search space
+    search_space = GeometrySearchSpace(
+        models=[args.model],
+        strategies=strategies,
+        benchmarks=benchmarks,
+        config=config,
+    )
+    print(f"\n{search_space.summary()}")
+    # Load model
+    print(f"\nLoading model {args.model}...")
+    model = WisentModel(args.model, device=args.device)
+    print(f"Model loaded: {model.num_layers} layers, hidden_size={model.hidden_size}")
+    # Create runner
+    cache_dir = args.cache_dir or f"/tmp/wisent_geometry_cache_{args.model.replace('/', '_')}"
+    runner = GeometryRunner(search_space, model, cache_dir=cache_dir)
+    # Run search
+    print(f"\nStarting geometry search...")
+    results = runner.run(show_progress=True)
+    # Save results
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    results.save(str(output_path))
+    print(f"\nResults saved to: {output_path}")
+    # Print summary
+    print(f"\n{'='*60}")
+    print("SUMMARY")
+    print(f"{'='*60}")
+    print(f"Total time: {results.total_time_seconds / 3600:.2f} hours")
+    print(f"  Extraction: {results.extraction_time_seconds / 3600:.2f} hours")
+    print(f"  Testing: {results.test_time_seconds / 60:.1f} minutes")
+    print(f"Benchmarks tested: {results.benchmarks_tested}")
+    print(f"Strategies tested: {results.strategies_tested}")
+    print(f"Layer combos tested: {results.layer_combos_tested}")
+    print(f"\nStructure distribution:")
+    for struct, count in sorted(results.get_structure_distribution().items(), key=lambda x: -x[1]):
+        pct = 100 * count / results.layer_combos_tested
+        print(f"  {struct}: {count} ({pct:.1f}%)")
+    print(f"\nTop 10 by linear score:")
+    for r in results.get_best_by_linear_score(10):
+        print(f"  {r.benchmark}/{r.strategy} layers={r.layers}: linear={r.linear_score:.3f} best={r.best_structure}")
+    print(f"\nTop 10 by cone score:")
+    for r in results.get_best_by_structure('cone', 10):
+        print(f"  {r.benchmark}/{r.strategy} layers={r.layers}: cone={r.cone_score:.3f} best={r.best_structure}")
+    # Summary by benchmark
+    print(f"\nSummary by benchmark (avg linear score):")
+    by_bench = results.get_summary_by_benchmark()
+    sorted_benches = sorted(by_bench.items(), key=lambda x: -x[1]['mean'])[:20]
+    for bench, stats in sorted_benches:
+        print(f"  {bench}: mean={stats['mean']:.3f} max={stats['max']:.3f}")
+    print(f"\n{'='*60}")
+    print("CONCLUSION")
+    print(f"{'='*60}")
+    # Determine if unified direction exists
+    dist = results.get_structure_distribution()
+    total = sum(dist.values())
+    linear_pct = 100 * dist.get('linear', 0) / total if total > 0 else 0
+    cone_pct = 100 * dist.get('cone', 0) / total if total > 0 else 0
+    orthogonal_pct = 100 * dist.get('orthogonal', 0) / total if total > 0 else 0
+    if linear_pct > 50:
+        print(f"UNIFIED LINEAR DIRECTION EXISTS ({linear_pct:.1f}% linear)")
+        print("Recommendation: Use CAA with the best layer/strategy combination")
+    elif cone_pct > 30:
+        print(f"CONE STRUCTURE DETECTED ({cone_pct:.1f}% cone)")
+        print("Recommendation: Use PRISM with multi-directional steering")
+    elif orthogonal_pct > 50:
+        print(f"ORTHOGONAL STRUCTURE ({orthogonal_pct:.1f}% orthogonal)")
+        print("Recommendation: No unified direction - use per-benchmark directions or TITAN")
+    else:
+        print("MIXED STRUCTURE - no clear unified direction")
+        print("Recommendation: Use TITAN for adaptive multi-component steering")

wisent/core/cli/get_activations.py CHANGED Viewed

@@ -90,7 +90,7 @@ def execute_get_activations(args):
         # 6. Collect activations
         print(f"\n⚡ Collecting activations...")
-        collector = ActivationCollector(model=model, store_device="cpu")
+        collector = ActivationCollector(model=model)
         enriched_pairs = []
         for i, pair in enumerate(pair_set.pairs):

wisent/core/cli/method_optimizer.py CHANGED Viewed

@@ -24,6 +24,7 @@ import torch
 from wisent.core.activations.activations_collector import ActivationCollector
 from wisent.core.activations.extraction_strategy import ExtractionStrategy
 from wisent.core.activations.core.atoms import LayerActivations
+from wisent.core.utils.device import resolve_default_device
 from wisent.core.contrastive_pairs.core.pair import ContrastivePair
 from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
@@ -175,7 +176,7 @@ class MethodOptimizer:
         self,
         model,
         method_name: str,
-        device: str = "cpu",
+        device: str | None = None,
         verbose: bool = True,
     ):
         """
@@ -189,7 +190,7 @@ class MethodOptimizer:
         """
         self.model = model
         self.method_name = method_name.lower()
-        self.device = device
+        self.device = device or resolve_default_device()
         self.verbose = verbose
         # Validate method exists
@@ -250,7 +251,7 @@ class MethodOptimizer:
             "mean_pooling": ExtractionStrategy.CHAT_MEAN,
             "first_token": ExtractionStrategy.CHAT_FIRST,
             "max_pooling": ExtractionStrategy.CHAT_MAX_NORM,
-            "continuation_token": ExtractionStrategy.CHAT_GEN_POINT,
+            "continuation_token": ExtractionStrategy.CHAT_FIRST,  # First answer token
         }
         prompt_strat_map = {

wisent/core/cli/modify_weights.py CHANGED Viewed

@@ -14,6 +14,7 @@ import time
 from pathlib import Path
 import torch
+from wisent.core.utils.device import resolve_default_device
 from wisent.core.cli_logger import setup_logger, bind
 from wisent.core.models.wisent_model import WisentModel
 from wisent.core.weight_modification import (
@@ -72,7 +73,7 @@ def execute_modify_weights(args):
         if vector_path.suffix == '.pt':
             # Load PyTorch format (from train-unified-goodness or similar)
-            checkpoint = torch.load(args.steering_vectors, map_location='cpu', weights_only=False)
+            checkpoint = torch.load(args.steering_vectors, map_location=resolve_default_device(), weights_only=False)
             # Handle different .pt file formats
             if 'steering_vectors' in checkpoint:
@@ -354,7 +355,7 @@ def execute_modify_weights(args):
             execute_train_unified_goodness(unified_args)
-            checkpoint = torch.load(unified_args.output, map_location='cpu', weights_only=False)
+            checkpoint = torch.load(unified_args.output, map_location=resolve_default_device(), weights_only=False)
             if 'steering_vectors' in checkpoint:
                 raw_vectors = checkpoint['steering_vectors']

wisent/core/cli/optimize_sample_size.py CHANGED Viewed

@@ -87,7 +87,7 @@ def execute_optimize_sample_size(args):
         # Get extraction strategy from args
         extraction_strategy = ExtractionStrategy(getattr(args, 'extraction_strategy', 'chat_last'))
-        collector = ActivationCollector(model=model, store_device="cpu")
+        collector = ActivationCollector(model=model)
         # Collect test activations for all test pairs (ONCE)
         X_test_list = []

wisent/core/cli/optimize_steering.py CHANGED Viewed

@@ -77,7 +77,7 @@ def _run_optuna_search_for_task(
         try:
             # Collect activations
-            collector = ActivationCollector(model=model, store_device="cpu")
+            collector = ActivationCollector(model=model)
             pos_acts = []
             neg_acts = []
@@ -389,7 +389,7 @@ def execute_comprehensive(args, model, loader):
         "first_token": ExtractionStrategy.CHAT_FIRST,
         "max_pooling": ExtractionStrategy.CHAT_MAX_NORM,
         "choice_token": ExtractionStrategy.MC_BALANCED,
-        "continuation_token": ExtractionStrategy.CHAT_GEN_POINT,
+        "continuation_token": ExtractionStrategy.CHAT_FIRST,  # First answer token
     }
     if hasattr(args, 'search_token_aggregations') and args.search_token_aggregations:
         token_agg_names = [x.strip() for x in args.search_token_aggregations.split(',')]
@@ -610,7 +610,7 @@ def execute_comprehensive(args, model, loader):
                                     layer_str = str(layer)
                                     # Step 1: Generate steering vector using CAA with current token aggregation
-                                    collector = ActivationCollector(model=model, store_device="cpu")
+                                    collector = ActivationCollector(model=model)
                                     pos_acts = []
                                     neg_acts = []
@@ -1456,7 +1456,7 @@ def execute_compare_methods(args, model, loader):
     # Collect activations once for all methods
     layer_str = str(args.layer)
-    collector = ActivationCollector(model=model, store_device="cpu")
+    collector = ActivationCollector(model=model)
     print("🎯 Collecting training activations (ONCE)...")
     pos_acts = []
@@ -1719,7 +1719,7 @@ def execute_optimize_layer(args, model, loader):
             print("Aborted by user.")
             return {"action": "optimize-layer", "status": "aborted", "reason": "user declined reduced search"}
-    collector = ActivationCollector(model=model, store_device="cpu")
+    collector = ActivationCollector(model=model)
     layer_results = {}
     best_layer = None
     best_accuracy = 0.0
@@ -1986,7 +1986,7 @@ def execute_optimize_strength(args, model, loader):
     # Collect activations ONCE
     layer_str = str(args.layer)
-    collector = ActivationCollector(model=model, store_device="cpu")
+    collector = ActivationCollector(model=model)
     print("🎯 Collecting training activations (ONCE)...")
     pos_acts = []
@@ -2277,7 +2277,7 @@ def execute_auto(args, model, loader):
     print(f"   Testing {len(strengths_to_test)} strengths: {strengths_to_test[0]:.2f} to {strengths_to_test[-1]:.2f}")
     print(f"   Total configurations: {len(layers_to_test) * len(strengths_to_test)}\n")
-    collector = ActivationCollector(model=model, store_device="cpu")
+    collector = ActivationCollector(model=model)
     all_results = {}
     best_config = None
     best_accuracy = 0.0
@@ -2575,16 +2575,15 @@ def execute_personalization(args, model):
     min_strength, max_strength = args.strength_range
     strengths_to_test = np.linspace(min_strength, max_strength, 7)
-    # Token aggregation strategies to test - ALL 5 strategies
+    # Token aggregation strategies to test
     token_aggregations_to_test = [
         ExtractionStrategy.CHAT_LAST,
         ExtractionStrategy.CHAT_MEAN,
         ExtractionStrategy.CHAT_FIRST,
         ExtractionStrategy.CHAT_MAX_NORM,
-        ExtractionStrategy.CHAT_GEN_POINT,
     ]
-    # Prompt construction strategies to test - ALL 5 strategies
+    # Prompt construction strategies to test
     prompt_constructions_to_test = [
         ExtractionStrategy.CHAT_LAST,
         ExtractionStrategy.CHAT_LAST,
@@ -2655,7 +2654,7 @@ def execute_personalization(args, model):
     print(flush=True)
     # Initialize activation collector
-    collector = ActivationCollector(model=model, store_device="cpu")
+    collector = ActivationCollector(model=model)
     # Track results for all configurations
     all_results = {}
@@ -3108,16 +3107,15 @@ def execute_multi_personalization(args, model):
     min_strength, max_strength = args.strength_range
     strengths_to_test = np.linspace(min_strength, max_strength, 7)
-    # Token aggregation strategies to test - ALL 5 strategies
+    # Token aggregation strategies to test
     token_aggregations_to_test = [
         ExtractionStrategy.CHAT_LAST,
         ExtractionStrategy.CHAT_MEAN,
         ExtractionStrategy.CHAT_FIRST,
         ExtractionStrategy.CHAT_MAX_NORM,
-        ExtractionStrategy.CHAT_GEN_POINT,
     ]
-    # Prompt construction strategies to test - ALL 5 strategies
+    # Prompt construction strategies to test
     prompt_constructions_to_test = [
         ExtractionStrategy.CHAT_LAST,
         ExtractionStrategy.CHAT_LAST,
@@ -3176,7 +3174,7 @@ def execute_multi_personalization(args, model):
     print(f"\n📝 Test prompts: {test_prompts}", flush=True)
     # Initialize collector
-    collector = ActivationCollector(model=model, store_device="cpu")
+    collector = ActivationCollector(model=model)
     # Track results
     all_results = {}
@@ -3565,7 +3563,7 @@ def execute_universal(args, model, loader):
     optimizer = MethodOptimizer(
         model=model,
         method_name=method_name,
-        device=args.device or "cpu",
+        device=args.device if hasattr(args, "device") and args.device else None,
         verbose=args.verbose if hasattr(args, "verbose") else True,
     )

wisent/core/cli/optimize_weights.py CHANGED Viewed

@@ -28,6 +28,7 @@ from dataclasses import dataclass
 from typing import Any, Callable
 import torch
+from wisent.core.utils.device import resolve_default_device
 def upload_to_s3(local_path: str, s3_bucket: str, s3_key: str) -> bool:
@@ -661,7 +662,7 @@ def _generate_steering_vectors(args, num_pairs: int, num_layers: int = None) ->
             execute_train_unified_goodness(vector_args)
             # Load the .pt file
-            checkpoint = torch.load(temp_output_pt, map_location='cpu', weights_only=False)
+            checkpoint = torch.load(temp_output_pt, map_location=resolve_default_device(), weights_only=False)
             # Handle different checkpoint formats
             if 'all_layer_vectors' in checkpoint:

wisent 0.7.701__py3-none-any.whl → 0.7.901__py3-none-any.whl

wisent 0.7.701py3-none-any.whl → 0.7.901py3-none-any.whl