wisent 0.5.14__py3-none-any.whl → 0.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (60) hide show
  1. wisent/__init__.py +1 -1
  2. wisent/cli.py +114 -0
  3. wisent/core/activations/activations_collector.py +19 -11
  4. wisent/core/cli/__init__.py +3 -1
  5. wisent/core/cli/create_steering_vector.py +60 -18
  6. wisent/core/cli/evaluate_responses.py +14 -8
  7. wisent/core/cli/generate_pairs_from_task.py +18 -5
  8. wisent/core/cli/get_activations.py +1 -1
  9. wisent/core/cli/multi_steer.py +108 -0
  10. wisent/core/cli/optimize_classification.py +187 -285
  11. wisent/core/cli/optimize_sample_size.py +78 -0
  12. wisent/core/cli/optimize_steering.py +354 -53
  13. wisent/core/cli/tasks.py +274 -9
  14. wisent/core/errors/__init__.py +0 -0
  15. wisent/core/errors/error_handler.py +134 -0
  16. wisent/core/evaluators/benchmark_specific/log_likelihoods_evaluator.py +152 -295
  17. wisent/core/evaluators/rotator.py +22 -8
  18. wisent/core/main.py +5 -1
  19. wisent/core/model_persistence.py +4 -19
  20. wisent/core/models/wisent_model.py +11 -3
  21. wisent/core/parser.py +4 -3
  22. wisent/core/parser_arguments/main_parser.py +1 -1
  23. wisent/core/parser_arguments/multi_steer_parser.py +4 -3
  24. wisent/core/parser_arguments/optimize_steering_parser.py +4 -0
  25. wisent/core/sample_size_optimizer_v2.py +1 -1
  26. wisent/core/steering_optimizer.py +2 -2
  27. wisent/tests/__init__.py +0 -0
  28. wisent/tests/examples/__init__.py +0 -0
  29. wisent/tests/examples/cli/__init__.py +0 -0
  30. wisent/tests/examples/cli/activations/__init__.py +0 -0
  31. wisent/tests/examples/cli/activations/test_get_activations.py +127 -0
  32. wisent/tests/examples/cli/classifier/__init__.py +0 -0
  33. wisent/tests/examples/cli/classifier/test_classifier_examples.py +141 -0
  34. wisent/tests/examples/cli/contrastive_pairs/__init__.py +0 -0
  35. wisent/tests/examples/cli/contrastive_pairs/test_generate_pairs.py +89 -0
  36. wisent/tests/examples/cli/evaluation/__init__.py +0 -0
  37. wisent/tests/examples/cli/evaluation/test_evaluation_examples.py +117 -0
  38. wisent/tests/examples/cli/generate/__init__.py +0 -0
  39. wisent/tests/examples/cli/generate/test_generate_with_classifier.py +146 -0
  40. wisent/tests/examples/cli/generate/test_generate_with_steering.py +149 -0
  41. wisent/tests/examples/cli/generate/test_only_generate.py +110 -0
  42. wisent/tests/examples/cli/multi_steering/__init__.py +0 -0
  43. wisent/tests/examples/cli/multi_steering/test_multi_steer_from_trained_vectors.py +210 -0
  44. wisent/tests/examples/cli/multi_steering/test_multi_steer_with_different_parameters.py +205 -0
  45. wisent/tests/examples/cli/multi_steering/test_train_and_multi_steer.py +174 -0
  46. wisent/tests/examples/cli/optimizer/__init__.py +0 -0
  47. wisent/tests/examples/cli/optimizer/test_optimize_sample_size.py +102 -0
  48. wisent/tests/examples/cli/optimizer/test_optimizer_examples.py +59 -0
  49. wisent/tests/examples/cli/steering/__init__.py +0 -0
  50. wisent/tests/examples/cli/steering/test_create_steering_vectors.py +135 -0
  51. wisent/tests/examples/cli/synthetic/__init__.py +0 -0
  52. wisent/tests/examples/cli/synthetic/test_synthetic_pairs.py +45 -0
  53. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/METADATA +3 -1
  54. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/RECORD +59 -29
  55. wisent/core/agent/diagnose/test_synthetic_classifier.py +0 -71
  56. /wisent/core/parser_arguments/{test_nonsense_parser.py → nonsense_parser.py} +0 -0
  57. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/WHEEL +0 -0
  58. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/entry_points.txt +0 -0
  59. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/licenses/LICENSE +0 -0
  60. {wisent-0.5.14.dist-info → wisent-0.5.15.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,146 @@
1
+ """
2
+ Test for generate_with_classifier.sh example.
3
+
4
+ This test validates classifier training and inference-only mode
5
+ for real-time monitoring during generation.
6
+ """
7
+
8
+ import subprocess
9
+ import pytest
10
+ import tempfile
11
+ import os
12
+
13
+
14
+ def test_classifier_train_only():
15
+ """Test training a classifier and saving it (train-only mode)."""
16
+ with tempfile.TemporaryDirectory() as tmpdir:
17
+ classifier_path = os.path.join(tmpdir, "classifier.pt")
18
+ output_dir = os.path.join(tmpdir, "training_logs")
19
+
20
+ result = subprocess.run(
21
+ [
22
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
23
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
24
+ "--layer", "3",
25
+ "--classifier-type", "logistic",
26
+ "--limit", "20",
27
+ "--train-only",
28
+ "--save-classifier", classifier_path,
29
+ "--output", output_dir,
30
+ "--device", "cpu",
31
+ "--verbose"
32
+ ],
33
+ capture_output=True,
34
+ text=True,
35
+ timeout=300
36
+ )
37
+
38
+ # Should complete without error
39
+ assert result.returncode == 0, f"Command failed with: {result.stderr}"
40
+
41
+ # Classifier file should be saved
42
+ assert os.path.exists(classifier_path), "Classifier was not saved"
43
+
44
+
45
+ def test_classifier_inference_only():
46
+ """Test using a trained classifier during generation (inference-only mode)."""
47
+ with tempfile.TemporaryDirectory() as tmpdir:
48
+ classifier_path = os.path.join(tmpdir, "classifier.pt")
49
+ training_output = os.path.join(tmpdir, "training")
50
+ inference_output = os.path.join(tmpdir, "inference")
51
+
52
+ # First train the classifier
53
+ train_result = subprocess.run(
54
+ [
55
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
56
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
57
+ "--layer", "3",
58
+ "--classifier-type", "logistic",
59
+ "--limit", "20",
60
+ "--train-only",
61
+ "--save-classifier", classifier_path,
62
+ "--output", training_output,
63
+ "--device", "cpu"
64
+ ],
65
+ capture_output=True,
66
+ text=True,
67
+ timeout=300
68
+ )
69
+
70
+ assert train_result.returncode == 0, f"Training failed: {train_result.stderr}"
71
+ assert os.path.exists(classifier_path), "Classifier was not saved"
72
+
73
+ # Now use it for inference
74
+ inference_result = subprocess.run(
75
+ [
76
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
77
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
78
+ "--layer", "3",
79
+ "--limit", "10",
80
+ "--inference-only",
81
+ "--load-classifier", classifier_path,
82
+ "--output", inference_output,
83
+ "--device", "cpu",
84
+ "--verbose"
85
+ ],
86
+ capture_output=True,
87
+ text=True,
88
+ timeout=300
89
+ )
90
+
91
+ # Should complete without error
92
+ assert inference_result.returncode == 0, f"Inference failed: {inference_result.stderr}"
93
+
94
+
95
+ def test_classifier_with_threshold():
96
+ """Test classifier with custom detection threshold."""
97
+ with tempfile.TemporaryDirectory() as tmpdir:
98
+ classifier_path = os.path.join(tmpdir, "classifier.pt")
99
+ training_output = os.path.join(tmpdir, "training")
100
+ inference_output = os.path.join(tmpdir, "inference")
101
+
102
+ # Train classifier
103
+ train_result = subprocess.run(
104
+ [
105
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
106
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
107
+ "--layer", "3",
108
+ "--classifier-type", "logistic",
109
+ "--limit", "20",
110
+ "--train-only",
111
+ "--save-classifier", classifier_path,
112
+ "--output", training_output,
113
+ "--device", "cpu"
114
+ ],
115
+ capture_output=True,
116
+ text=True,
117
+ timeout=300
118
+ )
119
+
120
+ assert train_result.returncode == 0
121
+
122
+ # Use with custom threshold
123
+ inference_result = subprocess.run(
124
+ [
125
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
126
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
127
+ "--layer", "3",
128
+ "--limit", "10",
129
+ "--inference-only",
130
+ "--load-classifier", classifier_path,
131
+ "--detection-threshold", "0.7",
132
+ "--output", inference_output,
133
+ "--device", "cpu",
134
+ "--verbose"
135
+ ],
136
+ capture_output=True,
137
+ text=True,
138
+ timeout=300
139
+ )
140
+
141
+ # Should complete without error
142
+ assert inference_result.returncode == 0, f"Inference failed: {inference_result.stderr}"
143
+
144
+
145
+ if __name__ == "__main__":
146
+ pytest.main([__file__, "-v"])
@@ -0,0 +1,149 @@
1
+ """
2
+ Test for generate_with_steering.sh example.
3
+
4
+ This test validates steering vector training and inference-only mode
5
+ for controlling model behavior during generation.
6
+ """
7
+
8
+ import subprocess
9
+ import pytest
10
+ import tempfile
11
+ import os
12
+
13
+
14
+ def test_steering_vector_train_only():
15
+ """Test training a steering vector and saving it (train-only mode)."""
16
+ with tempfile.TemporaryDirectory() as tmpdir:
17
+ vector_path = os.path.join(tmpdir, "vector.pt")
18
+ output_dir = os.path.join(tmpdir, "training_logs")
19
+
20
+ result = subprocess.run(
21
+ [
22
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
23
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
24
+ "--layer", "3",
25
+ "--steering-method", "CAA",
26
+ "--limit", "20",
27
+ "--train-only",
28
+ "--save-steering-vector", vector_path,
29
+ "--output", output_dir,
30
+ "--device", "cpu",
31
+ "--verbose"
32
+ ],
33
+ capture_output=True,
34
+ text=True,
35
+ timeout=300
36
+ )
37
+
38
+ # Should complete without error
39
+ assert result.returncode == 0, f"Command failed with: {result.stderr}"
40
+
41
+ # Vector file should be saved
42
+ assert os.path.exists(vector_path), "Steering vector was not saved"
43
+
44
+
45
+ def test_steering_vector_inference_only():
46
+ """Test using a trained steering vector during generation (inference-only mode)."""
47
+ with tempfile.TemporaryDirectory() as tmpdir:
48
+ vector_path = os.path.join(tmpdir, "vector.pt")
49
+ training_output = os.path.join(tmpdir, "training")
50
+ inference_output = os.path.join(tmpdir, "inference")
51
+
52
+ # First train the vector
53
+ train_result = subprocess.run(
54
+ [
55
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
56
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
57
+ "--layer", "3",
58
+ "--steering-method", "CAA",
59
+ "--limit", "20",
60
+ "--train-only",
61
+ "--save-steering-vector", vector_path,
62
+ "--output", training_output,
63
+ "--device", "cpu"
64
+ ],
65
+ capture_output=True,
66
+ text=True,
67
+ timeout=300
68
+ )
69
+
70
+ assert train_result.returncode == 0, f"Training failed: {train_result.stderr}"
71
+ assert os.path.exists(vector_path), "Vector was not saved"
72
+
73
+ # Now use it for inference
74
+ inference_result = subprocess.run(
75
+ [
76
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
77
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
78
+ "--layer", "3",
79
+ "--steering-method", "CAA",
80
+ "--steering-strength", "1.5",
81
+ "--limit", "10",
82
+ "--inference-only",
83
+ "--load-steering-vector", vector_path,
84
+ "--output", inference_output,
85
+ "--device", "cpu",
86
+ "--verbose"
87
+ ],
88
+ capture_output=True,
89
+ text=True,
90
+ timeout=300
91
+ )
92
+
93
+ # Should complete without error
94
+ assert inference_result.returncode == 0, f"Inference failed: {inference_result.stderr}"
95
+
96
+
97
+ def test_steering_with_caa_l2():
98
+ """Test steering with CAA method using strong steering strength."""
99
+ with tempfile.TemporaryDirectory() as tmpdir:
100
+ vector_path = os.path.join(tmpdir, "vector.pt")
101
+ training_output = os.path.join(tmpdir, "training")
102
+ inference_output = os.path.join(tmpdir, "inference")
103
+
104
+ # Train with CAA
105
+ train_result = subprocess.run(
106
+ [
107
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
108
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
109
+ "--layer", "3",
110
+ "--steering-method", "CAA",
111
+ "--limit", "20",
112
+ "--train-only",
113
+ "--save-steering-vector", vector_path,
114
+ "--output", training_output,
115
+ "--device", "cpu"
116
+ ],
117
+ capture_output=True,
118
+ text=True,
119
+ timeout=300
120
+ )
121
+
122
+ assert train_result.returncode == 0
123
+
124
+ # Use with stronger steering
125
+ inference_result = subprocess.run(
126
+ [
127
+ "python", "-m", "wisent.core.main", "tasks", "boolq",
128
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
129
+ "--layer", "3",
130
+ "--steering-method", "CAA",
131
+ "--steering-strength", "2.0",
132
+ "--limit", "10",
133
+ "--inference-only",
134
+ "--load-steering-vector", vector_path,
135
+ "--output", inference_output,
136
+ "--device", "cpu",
137
+ "--verbose"
138
+ ],
139
+ capture_output=True,
140
+ text=True,
141
+ timeout=300
142
+ )
143
+
144
+ # Should complete without error
145
+ assert inference_result.returncode == 0, f"Inference failed: {inference_result.stderr}"
146
+
147
+
148
+ if __name__ == "__main__":
149
+ pytest.main([__file__, "-v"])
@@ -0,0 +1,110 @@
1
+ """
2
+ Test for only_generate.sh example.
3
+
4
+ This test validates that the generate-responses command works correctly
5
+ for basic response generation without steering or classification.
6
+ """
7
+
8
+ import subprocess
9
+ import pytest
10
+ import tempfile
11
+ import os
12
+ import json
13
+
14
+
15
+ def test_generate_basic():
16
+ """Test basic response generation from a task."""
17
+ with tempfile.TemporaryDirectory() as tmpdir:
18
+ output_file = os.path.join(tmpdir, "responses.json")
19
+
20
+ result = subprocess.run(
21
+ [
22
+ "python", "-m", "wisent.core.main", "generate-responses",
23
+ "meta-llama/Llama-3.2-1B-Instruct",
24
+ "--task", "boolq",
25
+ "--num-questions", "3",
26
+ "--max-new-tokens", "50",
27
+ "--temperature", "0.7",
28
+ "--top-p", "0.95",
29
+ "--output", output_file,
30
+ "--device", "cpu",
31
+ "--verbose"
32
+ ],
33
+ capture_output=True,
34
+ text=True,
35
+ timeout=180
36
+ )
37
+
38
+ # Should complete without error
39
+ assert result.returncode == 0, f"Command failed with: {result.stderr}"
40
+
41
+ # Output file should exist
42
+ assert os.path.exists(output_file), "Output file was not created"
43
+
44
+ # Output file should contain valid JSON
45
+ with open(output_file, 'r') as f:
46
+ data = json.load(f)
47
+ assert isinstance(data, (list, dict)), "Output should be JSON"
48
+
49
+
50
+ def test_generate_deterministic():
51
+ """Test deterministic generation with temperature=0."""
52
+ with tempfile.TemporaryDirectory() as tmpdir:
53
+ output_file = os.path.join(tmpdir, "deterministic.json")
54
+
55
+ result = subprocess.run(
56
+ [
57
+ "python", "-m", "wisent.core.main", "generate-responses",
58
+ "meta-llama/Llama-3.2-1B-Instruct",
59
+ "--task", "boolq",
60
+ "--num-questions", "2",
61
+ "--max-new-tokens", "30",
62
+ "--temperature", "0.0",
63
+ "--output", output_file,
64
+ "--device", "cpu",
65
+ "--verbose"
66
+ ],
67
+ capture_output=True,
68
+ text=True,
69
+ timeout=180
70
+ )
71
+
72
+ # Should complete without error
73
+ assert result.returncode == 0, f"Command failed with: {result.stderr}"
74
+
75
+ # Output file should exist
76
+ assert os.path.exists(output_file), "Output file was not created"
77
+
78
+
79
+ def test_generate_creative():
80
+ """Test creative generation with higher temperature."""
81
+ with tempfile.TemporaryDirectory() as tmpdir:
82
+ output_file = os.path.join(tmpdir, "creative.json")
83
+
84
+ result = subprocess.run(
85
+ [
86
+ "python", "-m", "wisent.core.main", "generate-responses",
87
+ "meta-llama/Llama-3.2-1B-Instruct",
88
+ "--task", "boolq",
89
+ "--num-questions", "2",
90
+ "--max-new-tokens", "40",
91
+ "--temperature", "1.0",
92
+ "--top-p", "0.9",
93
+ "--output", output_file,
94
+ "--device", "cpu",
95
+ "--verbose"
96
+ ],
97
+ capture_output=True,
98
+ text=True,
99
+ timeout=180
100
+ )
101
+
102
+ # Should complete without error
103
+ assert result.returncode == 0, f"Command failed with: {result.stderr}"
104
+
105
+ # Output file should exist
106
+ assert os.path.exists(output_file), "Output file was not created"
107
+
108
+
109
+ if __name__ == "__main__":
110
+ pytest.main([__file__, "-v"])
File without changes
@@ -0,0 +1,210 @@
1
+ """
2
+ Test for multi_steer_from_trained_vectors.sh example.
3
+
4
+ This test validates practical use cases of combining pre-trained vectors
5
+ for different personas and scenarios (tech doc writer, teacher, etc).
6
+ """
7
+
8
+ import subprocess
9
+ import pytest
10
+ import tempfile
11
+ import os
12
+
13
+
14
+ def create_named_vector(tmpdir, name, trait_label):
15
+ """Helper to create a named test vector."""
16
+ vector_path = os.path.join(tmpdir, f"{name}.pt")
17
+
18
+ result = subprocess.run(
19
+ [
20
+ "python", "-m", "wisent.core.main", "generate-vector-from-task",
21
+ "--task", "boolq",
22
+ "--trait-label", trait_label,
23
+ "--output", vector_path,
24
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
25
+ "--num-pairs", "10",
26
+ "--layers", "3",
27
+ "--token-aggregation", "average",
28
+ "--method", "caa",
29
+ "--normalize",
30
+ "--device", "cpu"
31
+ ],
32
+ capture_output=True,
33
+ text=True,
34
+ timeout=300
35
+ )
36
+
37
+ assert result.returncode == 0, f"Failed to create {name} vector: {result.stderr}"
38
+ return vector_path
39
+
40
+
41
+ def test_technical_documentation_persona():
42
+ """Test combining vectors for technical documentation writer persona."""
43
+ with tempfile.TemporaryDirectory() as tmpdir:
44
+ formal_vector = create_named_vector(tmpdir, "formal", "formal_tone")
45
+ technical_vector = create_named_vector(tmpdir, "technical", "technical")
46
+ combined_path = os.path.join(tmpdir, "tech_doc.pt")
47
+
48
+ result = subprocess.run(
49
+ [
50
+ "python", "-m", "wisent.core.main", "multi-steer",
51
+ "--vector", f"{formal_vector}:0.5",
52
+ "--vector", f"{technical_vector}:0.5",
53
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
54
+ "--layer", "3",
55
+ "--method", "CAA",
56
+ "--prompt", "Explain REST APIs.",
57
+ "--max-new-tokens", "50",
58
+ "--normalize-weights",
59
+ "--save-combined", combined_path,
60
+ "--device", "cpu",
61
+ "--verbose"
62
+ ],
63
+ capture_output=True,
64
+ text=True,
65
+ timeout=180
66
+ )
67
+
68
+ assert result.returncode == 0, f"Command failed: {result.stderr}"
69
+ assert os.path.exists(combined_path), "Combined vector not saved"
70
+
71
+
72
+ def test_friendly_teacher_persona():
73
+ """Test combining vectors for friendly teacher persona."""
74
+ with tempfile.TemporaryDirectory() as tmpdir:
75
+ friendly_vector = create_named_vector(tmpdir, "friendly", "friendly")
76
+ detailed_vector = create_named_vector(tmpdir, "detailed", "detailed")
77
+ combined_path = os.path.join(tmpdir, "teacher.pt")
78
+
79
+ result = subprocess.run(
80
+ [
81
+ "python", "-m", "wisent.core.main", "multi-steer",
82
+ "--vector", f"{friendly_vector}:0.6",
83
+ "--vector", f"{detailed_vector}:0.4",
84
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
85
+ "--layer", "3",
86
+ "--method", "CAA",
87
+ "--prompt", "How does photosynthesis work?",
88
+ "--max-new-tokens", "50",
89
+ "--normalize-weights",
90
+ "--save-combined", combined_path,
91
+ "--device", "cpu",
92
+ "--verbose"
93
+ ],
94
+ capture_output=True,
95
+ text=True,
96
+ timeout=180
97
+ )
98
+
99
+ assert result.returncode == 0, f"Command failed: {result.stderr}"
100
+ assert os.path.exists(combined_path), "Combined vector not saved"
101
+
102
+
103
+ def test_executive_summary_persona():
104
+ """Test combining vectors for executive summary writer."""
105
+ with tempfile.TemporaryDirectory() as tmpdir:
106
+ concise_vector = create_named_vector(tmpdir, "concise", "concise")
107
+ formal_vector = create_named_vector(tmpdir, "formal", "formal")
108
+ combined_path = os.path.join(tmpdir, "executive.pt")
109
+
110
+ result = subprocess.run(
111
+ [
112
+ "python", "-m", "wisent.core.main", "multi-steer",
113
+ "--vector", f"{concise_vector}:0.6",
114
+ "--vector", f"{formal_vector}:0.4",
115
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
116
+ "--layer", "3",
117
+ "--method", "CAA",
118
+ "--prompt", "Benefits of cloud computing.",
119
+ "--max-new-tokens", "50",
120
+ "--normalize-weights",
121
+ "--save-combined", combined_path,
122
+ "--device", "cpu",
123
+ "--verbose"
124
+ ],
125
+ capture_output=True,
126
+ text=True,
127
+ timeout=180
128
+ )
129
+
130
+ assert result.returncode == 0, f"Command failed: {result.stderr}"
131
+ assert os.path.exists(combined_path), "Combined vector not saved"
132
+
133
+
134
+ def test_comparing_weight_ratios():
135
+ """Test comparing different weight ratios for the same prompt."""
136
+ with tempfile.TemporaryDirectory() as tmpdir:
137
+ vector1 = create_named_vector(tmpdir, "v1", "trait1")
138
+ vector2 = create_named_vector(tmpdir, "v2", "trait2")
139
+
140
+ prompt = "Explain machine learning."
141
+
142
+ # Configuration A: More weight on vector1
143
+ result_a = subprocess.run(
144
+ [
145
+ "python", "-m", "wisent.core.main", "multi-steer",
146
+ "--vector", f"{vector1}:0.7",
147
+ "--vector", f"{vector2}:0.3",
148
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
149
+ "--layer", "3",
150
+ "--method", "CAA",
151
+ "--prompt", prompt,
152
+ "--max-new-tokens", "50",
153
+ "--normalize-weights",
154
+ "--device", "cpu",
155
+ "--verbose"
156
+ ],
157
+ capture_output=True,
158
+ text=True,
159
+ timeout=180
160
+ )
161
+
162
+ assert result_a.returncode == 0, f"Config A failed: {result_a.stderr}"
163
+
164
+ # Configuration B: More weight on vector2
165
+ result_b = subprocess.run(
166
+ [
167
+ "python", "-m", "wisent.core.main", "multi-steer",
168
+ "--vector", f"{vector1}:0.3",
169
+ "--vector", f"{vector2}:0.7",
170
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
171
+ "--layer", "3",
172
+ "--method", "CAA",
173
+ "--prompt", prompt,
174
+ "--max-new-tokens", "50",
175
+ "--normalize-weights",
176
+ "--device", "cpu",
177
+ "--verbose"
178
+ ],
179
+ capture_output=True,
180
+ text=True,
181
+ timeout=180
182
+ )
183
+
184
+ assert result_b.returncode == 0, f"Config B failed: {result_b.stderr}"
185
+
186
+ # Configuration C: Balanced
187
+ result_c = subprocess.run(
188
+ [
189
+ "python", "-m", "wisent.core.main", "multi-steer",
190
+ "--vector", f"{vector1}:0.5",
191
+ "--vector", f"{vector2}:0.5",
192
+ "--model", "meta-llama/Llama-3.2-1B-Instruct",
193
+ "--layer", "3",
194
+ "--method", "CAA",
195
+ "--prompt", prompt,
196
+ "--max-new-tokens", "50",
197
+ "--normalize-weights",
198
+ "--device", "cpu",
199
+ "--verbose"
200
+ ],
201
+ capture_output=True,
202
+ text=True,
203
+ timeout=180
204
+ )
205
+
206
+ assert result_c.returncode == 0, f"Config C failed: {result_c.stderr}"
207
+
208
+
209
+ if __name__ == "__main__":
210
+ pytest.main([__file__, "-v"])