wisent 0.5.14__py3-none-any.whl → 0.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (60) hide show
  1. wisent/__init__.py +1 -1
  2. wisent/cli.py +114 -0
  3. wisent/core/activations/activations_collector.py +19 -11
  4. wisent/core/cli/__init__.py +3 -1
  5. wisent/core/cli/create_steering_vector.py +60 -18
  6. wisent/core/cli/evaluate_responses.py +14 -8
  7. wisent/core/cli/generate_pairs_from_task.py +18 -5
  8. wisent/core/cli/get_activations.py +1 -1
  9. wisent/core/cli/multi_steer.py +108 -0
  10. wisent/core/cli/optimize_classification.py +187 -285
  11. wisent/core/cli/optimize_sample_size.py +78 -0
  12. wisent/core/cli/optimize_steering.py +354 -53
  13. wisent/core/cli/tasks.py +274 -9
  14. wisent/core/errors/__init__.py +0 -0
  15. wisent/core/errors/error_handler.py +134 -0
  16. wisent/core/evaluators/benchmark_specific/log_likelihoods_evaluator.py +152 -295
  17. wisent/core/evaluators/rotator.py +22 -8
  18. wisent/core/main.py +5 -1
  19. wisent/core/model_persistence.py +4 -19
  20. wisent/core/models/wisent_model.py +11 -3
  21. wisent/core/parser.py +4 -3
  22. wisent/core/parser_arguments/main_parser.py +1 -1
  23. wisent/core/parser_arguments/multi_steer_parser.py +4 -3
  24. wisent/core/parser_arguments/optimize_steering_parser.py +4 -0
  25. wisent/core/sample_size_optimizer_v2.py +1 -1
  26. wisent/core/steering_optimizer.py +2 -2
  27. wisent/tests/__init__.py +0 -0
  28. wisent/tests/examples/__init__.py +0 -0
  29. wisent/tests/examples/cli/__init__.py +0 -0
  30. wisent/tests/examples/cli/activations/__init__.py +0 -0
  31. wisent/tests/examples/cli/activations/test_get_activations.py +127 -0
  32. wisent/tests/examples/cli/classifier/__init__.py +0 -0
  33. wisent/tests/examples/cli/classifier/test_classifier_examples.py +141 -0
  34. wisent/tests/examples/cli/contrastive_pairs/__init__.py +0 -0
  35. wisent/tests/examples/cli/contrastive_pairs/test_generate_pairs.py +89 -0
  36. wisent/tests/examples/cli/evaluation/__init__.py +0 -0
  37. wisent/tests/examples/cli/evaluation/test_evaluation_examples.py +117 -0
  38. wisent/tests/examples/cli/generate/__init__.py +0 -0
  39. wisent/tests/examples/cli/generate/test_generate_with_classifier.py +146 -0
  40. wisent/tests/examples/cli/generate/test_generate_with_steering.py +149 -0
  41. wisent/tests/examples/cli/generate/test_only_generate.py +110 -0
  42. wisent/tests/examples/cli/multi_steering/__init__.py +0 -0
  43. wisent/tests/examples/cli/multi_steering/test_multi_steer_from_trained_vectors.py +210 -0
  44. wisent/tests/examples/cli/multi_steering/test_multi_steer_with_different_parameters.py +205 -0
  45. wisent/tests/examples/cli/multi_steering/test_train_and_multi_steer.py +174 -0
  46. wisent/tests/examples/cli/optimizer/__init__.py +0 -0
  47. wisent/tests/examples/cli/optimizer/test_optimize_sample_size.py +102 -0
  48. wisent/tests/examples/cli/optimizer/test_optimizer_examples.py +59 -0
  49. wisent/tests/examples/cli/steering/__init__.py +0 -0
  50. wisent/tests/examples/cli/steering/test_create_steering_vectors.py +135 -0
  51. wisent/tests/examples/cli/synthetic/__init__.py +0 -0
  52. wisent/tests/examples/cli/synthetic/test_synthetic_pairs.py +45 -0
  53. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/METADATA +3 -1
  54. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/RECORD +59 -29
  55. wisent/core/agent/diagnose/test_synthetic_classifier.py +0 -71
  56. /wisent/core/parser_arguments/{test_nonsense_parser.py → nonsense_parser.py} +0 -0
  57. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/WHEEL +0 -0
  58. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/entry_points.txt +0 -0
  59. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/licenses/LICENSE +0 -0
  60. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ import sys
4
4
  import json
5
5
  import time
6
6
  import numpy as np
7
+ from wisent.core.evaluators.rotator import EvaluatorRotator
7
8
 
8
9
  def execute_optimize_steering(args):
9
10
  """
@@ -40,17 +41,17 @@ def execute_optimize_steering(args):
40
41
  # Initialize data loader
41
42
  loader = LMEvalDataLoader()
42
43
 
43
- # Execute based on subcommand
44
+ # Execute based on subcommand and return results
44
45
  if args.steering_action == 'comprehensive':
45
- execute_comprehensive(args, model, loader)
46
+ return execute_comprehensive(args, model, loader)
46
47
  elif args.steering_action == 'compare-methods':
47
- execute_compare_methods(args, model, loader)
48
+ return execute_compare_methods(args, model, loader)
48
49
  elif args.steering_action == 'optimize-layer':
49
- execute_optimize_layer(args, model, loader)
50
+ return execute_optimize_layer(args, model, loader)
50
51
  elif args.steering_action == 'optimize-strength':
51
- execute_optimize_strength(args, model, loader)
52
+ return execute_optimize_strength(args, model, loader)
52
53
  elif args.steering_action == 'auto':
53
- execute_auto(args, model, loader)
54
+ return execute_auto(args, model, loader)
54
55
  else:
55
56
  print(f"\n✗ Unknown steering action: {args.steering_action}")
56
57
  sys.exit(1)
@@ -107,17 +108,29 @@ def execute_comprehensive(args, model, loader):
107
108
 
108
109
  train_pairs = result['train_qa_pairs']
109
110
  test_pairs = result['test_qa_pairs']
110
-
111
+
111
112
  print(f" ✓ Loaded {len(train_pairs.pairs)} train, {len(test_pairs.pairs)} test pairs")
112
-
113
+
114
+ # Initialize evaluator for this task (auto-select based on task_name)
115
+ EvaluatorRotator.discover_evaluators('wisent.core.evaluators.benchmark_specific')
116
+ evaluator = EvaluatorRotator(evaluator=None, task_name=task_name) # None = auto-select
117
+ print(f" ✓ Using evaluator: {evaluator._evaluator.name} (auto-selected for {task_name})")
118
+
113
119
  print(f"\n 🔍 Testing CAA method across layers, strengths, AND strategies...")
114
120
  print(f" Total configurations: {len(layers_to_test)} layers × {len(strengths_to_test)} strengths × {len(strategies_to_test)} strategies = {len(layers_to_test) * len(strengths_to_test) * len(strategies_to_test)}")
115
-
121
+
116
122
  best_score = 0
117
123
  best_config = None
118
124
  method_results = {}
119
125
  configs_tested = 0
120
-
126
+ all_generation_examples = [] # Store generation examples for all configs
127
+
128
+ # Prepare test prompts if generating examples for all configs
129
+ if args.save_all_generation_examples or args.save_generation_examples:
130
+ num_examples = min(args.num_generation_examples, len(test_pairs.pairs))
131
+ example_pairs = test_pairs.pairs[:num_examples]
132
+ print(f" 📝 Will generate {num_examples} example responses per configuration")
133
+
121
134
  for layer in layers_to_test:
122
135
  for strength in strengths_to_test:
123
136
  for strategy in strategies_to_test:
@@ -161,40 +174,108 @@ def execute_comprehensive(args, model, loader):
161
174
  caa_method = CAAMethod(kwargs={"normalize": True})
162
175
  steering_vector = caa_method.train_for_layer(pos_acts, neg_acts)
163
176
 
164
- # Step 2: Evaluate with generation (simplified evaluation using activation alignment)
165
- # In production, this would actually generate text and evaluate quality
166
- # For now, we'll use activation alignment as a proxy
177
+ # Step 2: Evaluate with ACTUAL GENERATION and task evaluator
178
+ # Create steering plan
179
+ from wisent.core.models.core.atoms import SteeringVector, SteeringPlan
180
+ steering_vec = SteeringVector(vector=steering_vector, scale=strength)
181
+ steering_plan = SteeringPlan(
182
+ layers={layer_str: steering_vec},
183
+ layers_description=[f"CAA steering layer={layer}, strength={strength}, strategy={strategy}"]
184
+ )
185
+
186
+ # Apply steering to model
187
+ model.apply_steering(steering_plan)
188
+
167
189
  test_scores = []
168
-
190
+
169
191
  for pair in test_pairs.pairs:
170
- updated_pair = collector.collect_for_pair(
171
- pair,
172
- layers=[layer_str],
173
- aggregation=ActivationAggregationStrategy.MEAN_POOLING,
174
- return_full_sequence=False,
175
- normalize_layers=False
176
- )
177
-
178
- if updated_pair.positive_response.layers_activations and layer_str in updated_pair.positive_response.layers_activations:
179
- pos_act = updated_pair.positive_response.layers_activations[layer_str]
180
- neg_act = updated_pair.negative_response.layers_activations[layer_str]
181
-
182
- if pos_act is not None and neg_act is not None:
183
- # Apply steering with strategy weighting
184
- strategy_weight = get_strategy_weight(strategy, position=0.5) # Mid-position for evaluation
185
-
186
- pos_steered = pos_act + (strength * strategy_weight) * steering_vector
187
- neg_steered = neg_act + (strength * strategy_weight) * steering_vector
188
-
189
- # Score: positive should be more aligned with positive direction
190
- pos_score = torch.dot(pos_steered.flatten(), steering_vector.flatten()).item()
191
- neg_score = torch.dot(neg_steered.flatten(), steering_vector.flatten()).item()
192
-
193
- test_scores.append(1.0 if pos_score > neg_score else 0.0)
192
+ try:
193
+ # Prepare choices for multiple choice evaluation
194
+ choices = [pair.negative_response.content, pair.positive_response.content]
195
+ expected = pair.positive_response.content
196
+
197
+ # Use the Wisent evaluator to check correctness
198
+ # The evaluator will use log likelihood if possible,
199
+ # otherwise fall back to generation
200
+ eval_result = evaluator.evaluate(
201
+ response="", # Not used for log likelihood eval
202
+ expected=expected,
203
+ model=model,
204
+ question=pair.question,
205
+ choices=choices,
206
+ steering_plan=steering_plan
207
+ )
208
+
209
+ # Convert TRUTHFUL/UNTRUTHFUL to 1.0/0.0
210
+ is_correct = eval_result.ground_truth == "TRUTHFUL"
211
+ test_scores.append(1.0 if is_correct else 0.0)
212
+
213
+ except Exception as e:
214
+ # NO FALLBACK - raise the error immediately
215
+ print(f"\n❌ Evaluation failed for test pair:")
216
+ print(f" Question: {pair.question[:100]}")
217
+ print(f" Error: {e}")
218
+ raise
219
+
220
+ # Clear steering
221
+ model.clear_steering()
194
222
 
195
223
  if len(test_scores) > 0:
196
224
  avg_score = np.mean(test_scores)
197
-
225
+
226
+ # Generate examples for this configuration if requested
227
+ if args.save_all_generation_examples:
228
+ config_examples = []
229
+ for idx, pair in enumerate(example_pairs):
230
+ prompt = pair.question
231
+ try:
232
+ # Generate without steering (only once per prompt, reuse if already generated)
233
+ unsteered_response = model.generate(
234
+ [[{"role": "user", "content": prompt}]],
235
+ max_new_tokens=100,
236
+ temperature=0.7,
237
+ use_steering=False
238
+ )[0]
239
+
240
+ # Create steering plan for this config
241
+ from wisent.core.models.core.atoms import SteeringVector, SteeringPlan
242
+ steering_vec = SteeringVector(vector=steering_vector, scale=strength)
243
+ steering_plan = SteeringPlan(
244
+ layers={layer_str: steering_vec},
245
+ layers_description=[f"CAA steering layer={layer}, strength={strength}, strategy={strategy}"]
246
+ )
247
+
248
+ # Generate with steering
249
+ model.apply_steering(steering_plan)
250
+ steered_response = model.generate(
251
+ [[{"role": "user", "content": prompt}]],
252
+ max_new_tokens=100,
253
+ temperature=0.7,
254
+ use_steering=True,
255
+ steering_plan=steering_plan
256
+ )[0]
257
+ model.clear_steering()
258
+
259
+ config_examples.append({
260
+ 'question': prompt,
261
+ 'correct_answer': pair.positive_response.content,
262
+ 'incorrect_answer': pair.negative_response.content,
263
+ 'unsteered_generation': unsteered_response,
264
+ 'steered_generation': steered_response
265
+ })
266
+ except Exception as e:
267
+ if args.verbose:
268
+ print(f" ⚠️ Failed to generate example for config layer={layer}, strength={strength}, strategy={strategy}: {e}")
269
+
270
+ # Store this config's examples
271
+ all_generation_examples.append({
272
+ 'layer': layer,
273
+ 'strength': strength,
274
+ 'strategy': strategy,
275
+ 'accuracy': avg_score,
276
+ 'examples': config_examples
277
+ })
278
+
198
279
  if avg_score > best_score:
199
280
  best_score = avg_score
200
281
  best_config = {
@@ -203,14 +284,18 @@ def execute_comprehensive(args, model, loader):
203
284
  'strategy': strategy,
204
285
  'accuracy': avg_score
205
286
  }
206
-
287
+
207
288
  if configs_tested % 10 == 0 and args.verbose:
208
289
  print(f" Tested {configs_tested} configurations...", end='\r')
209
-
290
+
210
291
  except Exception as e:
211
- if args.verbose:
212
- print(f" Error at layer={layer}, strength={strength}, strategy={strategy}: {e}")
213
- continue
292
+ # NO FALLBACK - raise the error immediately
293
+ print(f"\n❌ Configuration test failed:")
294
+ print(f" Layer: {layer}")
295
+ print(f" Strength: {strength}")
296
+ print(f" Strategy: {strategy}")
297
+ print(f" Error: {e}")
298
+ raise
214
299
 
215
300
  if best_config:
216
301
  print(f"\n ✅ Best configuration found:")
@@ -219,7 +304,7 @@ def execute_comprehensive(args, model, loader):
219
304
  print(f" Strength: {best_config['strength']}")
220
305
  print(f" Strategy: {best_config['strategy']} ⭐")
221
306
  print(f" Accuracy: {best_config['accuracy']:.3f}")
222
-
307
+
223
308
  method_results['CAA'] = {
224
309
  'optimal_layer': best_config['layer'],
225
310
  'optimal_strength': best_config['strength'],
@@ -227,6 +312,179 @@ def execute_comprehensive(args, model, loader):
227
312
  'accuracy': best_config['accuracy'],
228
313
  'f1': best_config['accuracy']
229
314
  }
315
+
316
+ # Save best steering vector if requested
317
+ if args.save_best_vector:
318
+ import os
319
+ vector_dir = args.save_best_vector
320
+ os.makedirs(vector_dir, exist_ok=True)
321
+
322
+ # Recreate the best steering vector
323
+ best_layer_str = str(best_config['layer'])
324
+ pos_acts_best = []
325
+ neg_acts_best = []
326
+
327
+ for pair in train_pairs.pairs:
328
+ updated_pair = collector.collect_for_pair(
329
+ pair,
330
+ layers=[best_layer_str],
331
+ aggregation=ActivationAggregationStrategy.MEAN_POOLING,
332
+ return_full_sequence=False,
333
+ normalize_layers=False
334
+ )
335
+
336
+ if updated_pair.positive_response.layers_activations and best_layer_str in updated_pair.positive_response.layers_activations:
337
+ act = updated_pair.positive_response.layers_activations[best_layer_str]
338
+ if act is not None:
339
+ pos_acts_best.append(act)
340
+
341
+ if updated_pair.negative_response.layers_activations and best_layer_str in updated_pair.negative_response.layers_activations:
342
+ act = updated_pair.negative_response.layers_activations[best_layer_str]
343
+ if act is not None:
344
+ neg_acts_best.append(act)
345
+
346
+ # Create and save steering vector
347
+ caa_method = CAAMethod(kwargs={"normalize": True})
348
+ best_steering_vector = caa_method.train_for_layer(pos_acts_best, neg_acts_best)
349
+
350
+ vector_path = os.path.join(vector_dir, f"{task_name}_layer{best_config['layer']}.pt")
351
+ torch.save({
352
+ 'steering_vector': best_steering_vector,
353
+ 'vector': best_steering_vector, # Legacy key
354
+ 'layer': best_config['layer'],
355
+ 'layer_index': best_config['layer'], # Legacy key
356
+ 'strength': best_config['strength'],
357
+ 'strategy': best_config['strategy'],
358
+ 'method': 'CAA',
359
+ 'task': task_name,
360
+ 'model': args.model,
361
+ 'accuracy': best_config['accuracy']
362
+ }, vector_path)
363
+ print(f" 💾 Saved steering vector to: {vector_path}")
364
+
365
+ # Save generation examples
366
+ if args.save_all_generation_examples:
367
+ # Save examples for ALL configurations
368
+ examples_path = os.path.join(
369
+ args.save_best_vector if args.save_best_vector else "./optimization_results",
370
+ f"{task_name}_all_generation_examples.json"
371
+ )
372
+ os.makedirs(os.path.dirname(examples_path), exist_ok=True)
373
+
374
+ with open(examples_path, 'w') as f:
375
+ json.dump({
376
+ 'task': task_name,
377
+ 'model': args.model,
378
+ 'best_config': best_config,
379
+ 'configurations': all_generation_examples
380
+ }, f, indent=2)
381
+
382
+ print(f"\n 💾 Saved generation examples for {len(all_generation_examples)} configurations to: {examples_path}")
383
+
384
+ elif args.save_generation_examples:
385
+ # Save examples only for the best configuration
386
+ print(f"\n 📝 Generating example responses for best configuration...")
387
+
388
+ # Get a few test examples to generate from
389
+ num_examples = min(args.num_generation_examples, len(test_pairs.pairs))
390
+ example_pairs = test_pairs.pairs[:num_examples]
391
+
392
+ generation_examples = []
393
+
394
+ for idx, pair in enumerate(example_pairs):
395
+ # Create prompt from the question
396
+ prompt = pair.question
397
+
398
+ try:
399
+ # Generate without steering
400
+ unsteered_response = model.generate(
401
+ [[{"role": "user", "content": prompt}]],
402
+ max_new_tokens=100,
403
+ temperature=0.7,
404
+ use_steering=False
405
+ )[0]
406
+
407
+ # Recreate best steering vector for generation
408
+ best_layer_str = str(best_config['layer'])
409
+ pos_acts_gen = []
410
+ neg_acts_gen = []
411
+
412
+ # Collect activations again for steering
413
+ for train_pair in train_pairs.pairs[:20]: # Use subset for speed
414
+ updated_pair = collector.collect_for_pair(
415
+ train_pair,
416
+ layers=[best_layer_str],
417
+ aggregation=ActivationAggregationStrategy.MEAN_POOLING,
418
+ return_full_sequence=False,
419
+ normalize_layers=False
420
+ )
421
+
422
+ if updated_pair.positive_response.layers_activations and best_layer_str in updated_pair.positive_response.layers_activations:
423
+ act = updated_pair.positive_response.layers_activations[best_layer_str]
424
+ if act is not None:
425
+ pos_acts_gen.append(act)
426
+
427
+ if updated_pair.negative_response.layers_activations and best_layer_str in updated_pair.negative_response.layers_activations:
428
+ act = updated_pair.negative_response.layers_activations[best_layer_str]
429
+ if act is not None:
430
+ neg_acts_gen.append(act)
431
+
432
+ # Create steering vector
433
+ caa_method_gen = CAAMethod(kwargs={"normalize": True})
434
+ steering_vector_gen = caa_method_gen.train_for_layer(pos_acts_gen, neg_acts_gen)
435
+
436
+ # Create SteeringPlan
437
+ from wisent.core.models.core.atoms import SteeringVector, SteeringPlan
438
+ steering_vec = SteeringVector(vector=steering_vector_gen, scale=best_config['strength'])
439
+ steering_plan = SteeringPlan(
440
+ layers={best_layer_str: steering_vec},
441
+ layers_description=[f"CAA steering for {task_name}"]
442
+ )
443
+
444
+ # Generate with steering
445
+ model.attach(steering_plan)
446
+ steered_response = model.generate(
447
+ [[{"role": "user", "content": prompt}]],
448
+ max_new_tokens=100,
449
+ temperature=0.7,
450
+ use_steering=True,
451
+ steering_plan=steering_plan
452
+ )[0]
453
+ model.detach()
454
+
455
+ generation_examples.append({
456
+ 'question': prompt,
457
+ 'correct_answer': pair.positive_response.content,
458
+ 'incorrect_answer': pair.negative_response.content,
459
+ 'unsteered_generation': unsteered_response,
460
+ 'steered_generation': steered_response
461
+ })
462
+
463
+ print(f" Generated example {idx+1}/{num_examples}")
464
+
465
+ except Exception as e:
466
+ print(f" ⚠️ Failed to generate example {idx+1}: {e}")
467
+ if args.verbose:
468
+ import traceback
469
+ traceback.print_exc()
470
+
471
+ # Save examples to JSON
472
+ examples_path = os.path.join(
473
+ args.save_best_vector if args.save_best_vector else "./optimization_results",
474
+ f"{task_name}_generation_examples.json"
475
+ )
476
+ os.makedirs(os.path.dirname(examples_path), exist_ok=True)
477
+
478
+ with open(examples_path, 'w') as f:
479
+ json.dump({
480
+ 'task': task_name,
481
+ 'model': args.model,
482
+ 'best_config': best_config,
483
+ 'examples': generation_examples
484
+ }, f, indent=2)
485
+
486
+ print(f" 💾 Saved {len(generation_examples)} generation examples to: {examples_path}")
487
+
230
488
  else:
231
489
  print(f"\n ⚠️ No valid configuration found")
232
490
  method_results['CAA'] = {
@@ -247,12 +505,14 @@ def execute_comprehensive(args, model, loader):
247
505
 
248
506
  task_time = time.time() - task_start_time
249
507
  print(f"\n ⏱️ Task completed in {task_time:.1f}s (tested {configs_tested} configurations)")
250
-
508
+
251
509
  except Exception as e:
252
- print(f" ❌ Failed to optimize {task_name}: {e}")
510
+ # NO FALLBACK - raise the error immediately
511
+ print(f"\n❌ Task '{task_name}' optimization failed:")
512
+ print(f" Error: {e}")
253
513
  import traceback
254
514
  traceback.print_exc()
255
- continue
515
+ raise
256
516
 
257
517
  # Save results
258
518
  print(f"\n{'='*80}")
@@ -283,6 +543,17 @@ def execute_comprehensive(args, model, loader):
283
543
  print(f" {task_name:20s} | Method: {config['best_method']:10s} | Layer: {config['best_layer']:2d} | Strength: {config['best_strength']:.2f} | Strategy: {config['best_strategy']:18s}")
284
544
  print("-" * 100 + "\n")
285
545
 
546
+ # Return results for programmatic access
547
+ return {
548
+ "model": args.model,
549
+ "action": "comprehensive",
550
+ "methods_tested": args.methods,
551
+ "tasks_optimized": list(all_results.keys()),
552
+ "results": all_results,
553
+ "results_file": results_file,
554
+ "optimization_dimensions": ['layer', 'strength', 'strategy']
555
+ }
556
+
286
557
 
287
558
  def get_strategy_weight(strategy: str, position: float) -> float:
288
559
  """
@@ -318,7 +589,7 @@ def execute_compare_methods(args, model, loader):
318
589
  print(f"🔍 Comparing steering methods for task: {args.task}\n")
319
590
  print(f" Methods: {', '.join(args.methods)}")
320
591
  print(f" Limit: {args.limit} samples\n")
321
-
592
+
322
593
  result = loader._load_one_task(
323
594
  task_name=args.task,
324
595
  split_ratio=0.8,
@@ -327,38 +598,68 @@ def execute_compare_methods(args, model, loader):
327
598
  training_limit=None,
328
599
  testing_limit=None
329
600
  )
330
-
601
+
331
602
  print(f"✅ Loaded {len(result['train_qa_pairs'].pairs)} train pairs\n")
332
603
  print("⚠️ Full method comparison requires implementation of HPR, DAC, BiPO, KSteering")
333
604
  print(" Currently only CAA is fully implemented")
334
605
 
606
+ return {
607
+ "action": "compare-methods",
608
+ "task": args.task,
609
+ "methods": args.methods,
610
+ "status": "not_fully_implemented"
611
+ }
612
+
335
613
 
336
614
  def execute_optimize_layer(args, model, loader):
337
615
  """Execute layer optimization."""
338
616
  print(f"🎯 Optimizing steering layer for task: {args.task}\n")
339
617
  print(f" Method: {args.method}")
340
618
  print(f" Strength: {args.strength}\n")
341
-
619
+
342
620
  print("⚠️ Layer optimization not yet fully implemented")
343
621
  print(f" This would optimize layer for {args.method} method")
344
622
 
623
+ return {
624
+ "action": "optimize-layer",
625
+ "task": args.task,
626
+ "method": args.method,
627
+ "strength": args.strength,
628
+ "status": "not_fully_implemented"
629
+ }
630
+
345
631
 
346
632
  def execute_optimize_strength(args, model, loader):
347
633
  """Execute strength optimization."""
348
634
  print(f"💪 Optimizing steering strength for task: {args.task}\n")
349
635
  print(f" Method: {args.method}")
350
636
  print(f" Strength range: {args.strength_range[0]} to {args.strength_range[1]}\n")
351
-
637
+
352
638
  print("⚠️ Strength optimization not yet fully implemented")
353
639
  print(f" This would optimize strength for {args.method} method")
354
640
 
641
+ return {
642
+ "action": "optimize-strength",
643
+ "task": args.task,
644
+ "method": args.method,
645
+ "strength_range": args.strength_range,
646
+ "status": "not_fully_implemented"
647
+ }
648
+
355
649
 
356
650
  def execute_auto(args, model, loader):
357
651
  """Execute automatic optimization based on classification config."""
358
652
  print(f"🤖 Running automatic steering optimization...\n")
359
653
  print(f" Methods: {', '.join(args.methods)}")
360
654
  print(f" Strength range: {args.strength_range}\n")
361
-
655
+
362
656
  print("⚠️ Auto optimization not yet fully implemented")
363
657
  print(" This would use classification results to guide steering optimization")
364
658
 
659
+ return {
660
+ "action": "auto",
661
+ "methods": args.methods,
662
+ "strength_range": args.strength_range,
663
+ "status": "not_fully_implemented"
664
+ }
665
+