wisent 0.5.13__py3-none-any.whl → 0.5.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wisent might be problematic. Click here for more details.
- wisent/__init__.py +1 -1
- wisent/cli.py +114 -0
- wisent/core/activations/activations_collector.py +19 -11
- wisent/core/agent/__init__.py +1 -18
- wisent/core/agent/diagnose/__init__.py +1 -55
- wisent/core/cli/__init__.py +3 -1
- wisent/core/cli/create_steering_vector.py +60 -18
- wisent/core/cli/evaluate_responses.py +14 -8
- wisent/core/cli/generate_pairs_from_task.py +18 -5
- wisent/core/cli/get_activations.py +1 -1
- wisent/core/cli/multi_steer.py +108 -0
- wisent/core/cli/optimize_classification.py +187 -285
- wisent/core/cli/optimize_sample_size.py +78 -0
- wisent/core/cli/optimize_steering.py +354 -53
- wisent/core/cli/tasks.py +274 -9
- wisent/core/errors/__init__.py +0 -0
- wisent/core/errors/error_handler.py +134 -0
- wisent/core/evaluators/benchmark_specific/log_likelihoods_evaluator.py +152 -295
- wisent/core/evaluators/rotator.py +22 -8
- wisent/core/main.py +5 -1
- wisent/core/model_persistence.py +4 -19
- wisent/core/models/wisent_model.py +11 -3
- wisent/core/parser.py +4 -3
- wisent/core/parser_arguments/main_parser.py +1 -1
- wisent/core/parser_arguments/multi_steer_parser.py +4 -3
- wisent/core/parser_arguments/optimize_steering_parser.py +4 -0
- wisent/core/sample_size_optimizer_v2.py +1 -1
- wisent/core/steering_optimizer.py +2 -2
- wisent/tests/__init__.py +0 -0
- wisent/tests/examples/__init__.py +0 -0
- wisent/tests/examples/cli/__init__.py +0 -0
- wisent/tests/examples/cli/activations/__init__.py +0 -0
- wisent/tests/examples/cli/activations/test_get_activations.py +127 -0
- wisent/tests/examples/cli/classifier/__init__.py +0 -0
- wisent/tests/examples/cli/classifier/test_classifier_examples.py +141 -0
- wisent/tests/examples/cli/contrastive_pairs/__init__.py +0 -0
- wisent/tests/examples/cli/contrastive_pairs/test_generate_pairs.py +89 -0
- wisent/tests/examples/cli/evaluation/__init__.py +0 -0
- wisent/tests/examples/cli/evaluation/test_evaluation_examples.py +117 -0
- wisent/tests/examples/cli/generate/__init__.py +0 -0
- wisent/tests/examples/cli/generate/test_generate_with_classifier.py +146 -0
- wisent/tests/examples/cli/generate/test_generate_with_steering.py +149 -0
- wisent/tests/examples/cli/generate/test_only_generate.py +110 -0
- wisent/tests/examples/cli/multi_steering/__init__.py +0 -0
- wisent/tests/examples/cli/multi_steering/test_multi_steer_from_trained_vectors.py +210 -0
- wisent/tests/examples/cli/multi_steering/test_multi_steer_with_different_parameters.py +205 -0
- wisent/tests/examples/cli/multi_steering/test_train_and_multi_steer.py +174 -0
- wisent/tests/examples/cli/optimizer/__init__.py +0 -0
- wisent/tests/examples/cli/optimizer/test_optimize_sample_size.py +102 -0
- wisent/tests/examples/cli/optimizer/test_optimizer_examples.py +59 -0
- wisent/tests/examples/cli/steering/__init__.py +0 -0
- wisent/tests/examples/cli/steering/test_create_steering_vectors.py +135 -0
- wisent/tests/examples/cli/synthetic/__init__.py +0 -0
- wisent/tests/examples/cli/synthetic/test_synthetic_pairs.py +45 -0
- {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/METADATA +3 -1
- {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/RECORD +61 -31
- wisent/core/agent/diagnose/test_synthetic_classifier.py +0 -71
- /wisent/core/parser_arguments/{test_nonsense_parser.py → nonsense_parser.py} +0 -0
- {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/WHEEL +0 -0
- {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/entry_points.txt +0 -0
- {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/licenses/LICENSE +0 -0
- {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test for generate_with_classifier.sh example.
|
|
3
|
+
|
|
4
|
+
This test validates classifier training and inference-only mode
|
|
5
|
+
for real-time monitoring during generation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import subprocess
|
|
9
|
+
import pytest
|
|
10
|
+
import tempfile
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_classifier_train_only():
|
|
15
|
+
"""Test training a classifier and saving it (train-only mode)."""
|
|
16
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
17
|
+
classifier_path = os.path.join(tmpdir, "classifier.pt")
|
|
18
|
+
output_dir = os.path.join(tmpdir, "training_logs")
|
|
19
|
+
|
|
20
|
+
result = subprocess.run(
|
|
21
|
+
[
|
|
22
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
23
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
24
|
+
"--layer", "3",
|
|
25
|
+
"--classifier-type", "logistic",
|
|
26
|
+
"--limit", "20",
|
|
27
|
+
"--train-only",
|
|
28
|
+
"--save-classifier", classifier_path,
|
|
29
|
+
"--output", output_dir,
|
|
30
|
+
"--device", "cpu",
|
|
31
|
+
"--verbose"
|
|
32
|
+
],
|
|
33
|
+
capture_output=True,
|
|
34
|
+
text=True,
|
|
35
|
+
timeout=300
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Should complete without error
|
|
39
|
+
assert result.returncode == 0, f"Command failed with: {result.stderr}"
|
|
40
|
+
|
|
41
|
+
# Classifier file should be saved
|
|
42
|
+
assert os.path.exists(classifier_path), "Classifier was not saved"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_classifier_inference_only():
|
|
46
|
+
"""Test using a trained classifier during generation (inference-only mode)."""
|
|
47
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
48
|
+
classifier_path = os.path.join(tmpdir, "classifier.pt")
|
|
49
|
+
training_output = os.path.join(tmpdir, "training")
|
|
50
|
+
inference_output = os.path.join(tmpdir, "inference")
|
|
51
|
+
|
|
52
|
+
# First train the classifier
|
|
53
|
+
train_result = subprocess.run(
|
|
54
|
+
[
|
|
55
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
56
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
57
|
+
"--layer", "3",
|
|
58
|
+
"--classifier-type", "logistic",
|
|
59
|
+
"--limit", "20",
|
|
60
|
+
"--train-only",
|
|
61
|
+
"--save-classifier", classifier_path,
|
|
62
|
+
"--output", training_output,
|
|
63
|
+
"--device", "cpu"
|
|
64
|
+
],
|
|
65
|
+
capture_output=True,
|
|
66
|
+
text=True,
|
|
67
|
+
timeout=300
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
assert train_result.returncode == 0, f"Training failed: {train_result.stderr}"
|
|
71
|
+
assert os.path.exists(classifier_path), "Classifier was not saved"
|
|
72
|
+
|
|
73
|
+
# Now use it for inference
|
|
74
|
+
inference_result = subprocess.run(
|
|
75
|
+
[
|
|
76
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
77
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
78
|
+
"--layer", "3",
|
|
79
|
+
"--limit", "10",
|
|
80
|
+
"--inference-only",
|
|
81
|
+
"--load-classifier", classifier_path,
|
|
82
|
+
"--output", inference_output,
|
|
83
|
+
"--device", "cpu",
|
|
84
|
+
"--verbose"
|
|
85
|
+
],
|
|
86
|
+
capture_output=True,
|
|
87
|
+
text=True,
|
|
88
|
+
timeout=300
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Should complete without error
|
|
92
|
+
assert inference_result.returncode == 0, f"Inference failed: {inference_result.stderr}"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_classifier_with_threshold():
|
|
96
|
+
"""Test classifier with custom detection threshold."""
|
|
97
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
98
|
+
classifier_path = os.path.join(tmpdir, "classifier.pt")
|
|
99
|
+
training_output = os.path.join(tmpdir, "training")
|
|
100
|
+
inference_output = os.path.join(tmpdir, "inference")
|
|
101
|
+
|
|
102
|
+
# Train classifier
|
|
103
|
+
train_result = subprocess.run(
|
|
104
|
+
[
|
|
105
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
106
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
107
|
+
"--layer", "3",
|
|
108
|
+
"--classifier-type", "logistic",
|
|
109
|
+
"--limit", "20",
|
|
110
|
+
"--train-only",
|
|
111
|
+
"--save-classifier", classifier_path,
|
|
112
|
+
"--output", training_output,
|
|
113
|
+
"--device", "cpu"
|
|
114
|
+
],
|
|
115
|
+
capture_output=True,
|
|
116
|
+
text=True,
|
|
117
|
+
timeout=300
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
assert train_result.returncode == 0
|
|
121
|
+
|
|
122
|
+
# Use with custom threshold
|
|
123
|
+
inference_result = subprocess.run(
|
|
124
|
+
[
|
|
125
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
126
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
127
|
+
"--layer", "3",
|
|
128
|
+
"--limit", "10",
|
|
129
|
+
"--inference-only",
|
|
130
|
+
"--load-classifier", classifier_path,
|
|
131
|
+
"--detection-threshold", "0.7",
|
|
132
|
+
"--output", inference_output,
|
|
133
|
+
"--device", "cpu",
|
|
134
|
+
"--verbose"
|
|
135
|
+
],
|
|
136
|
+
capture_output=True,
|
|
137
|
+
text=True,
|
|
138
|
+
timeout=300
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Should complete without error
|
|
142
|
+
assert inference_result.returncode == 0, f"Inference failed: {inference_result.stderr}"
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
if __name__ == "__main__":
|
|
146
|
+
pytest.main([__file__, "-v"])
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test for generate_with_steering.sh example.
|
|
3
|
+
|
|
4
|
+
This test validates steering vector training and inference-only mode
|
|
5
|
+
for controlling model behavior during generation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import subprocess
|
|
9
|
+
import pytest
|
|
10
|
+
import tempfile
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_steering_vector_train_only():
|
|
15
|
+
"""Test training a steering vector and saving it (train-only mode)."""
|
|
16
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
17
|
+
vector_path = os.path.join(tmpdir, "vector.pt")
|
|
18
|
+
output_dir = os.path.join(tmpdir, "training_logs")
|
|
19
|
+
|
|
20
|
+
result = subprocess.run(
|
|
21
|
+
[
|
|
22
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
23
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
24
|
+
"--layer", "3",
|
|
25
|
+
"--steering-method", "CAA",
|
|
26
|
+
"--limit", "20",
|
|
27
|
+
"--train-only",
|
|
28
|
+
"--save-steering-vector", vector_path,
|
|
29
|
+
"--output", output_dir,
|
|
30
|
+
"--device", "cpu",
|
|
31
|
+
"--verbose"
|
|
32
|
+
],
|
|
33
|
+
capture_output=True,
|
|
34
|
+
text=True,
|
|
35
|
+
timeout=300
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Should complete without error
|
|
39
|
+
assert result.returncode == 0, f"Command failed with: {result.stderr}"
|
|
40
|
+
|
|
41
|
+
# Vector file should be saved
|
|
42
|
+
assert os.path.exists(vector_path), "Steering vector was not saved"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_steering_vector_inference_only():
|
|
46
|
+
"""Test using a trained steering vector during generation (inference-only mode)."""
|
|
47
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
48
|
+
vector_path = os.path.join(tmpdir, "vector.pt")
|
|
49
|
+
training_output = os.path.join(tmpdir, "training")
|
|
50
|
+
inference_output = os.path.join(tmpdir, "inference")
|
|
51
|
+
|
|
52
|
+
# First train the vector
|
|
53
|
+
train_result = subprocess.run(
|
|
54
|
+
[
|
|
55
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
56
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
57
|
+
"--layer", "3",
|
|
58
|
+
"--steering-method", "CAA",
|
|
59
|
+
"--limit", "20",
|
|
60
|
+
"--train-only",
|
|
61
|
+
"--save-steering-vector", vector_path,
|
|
62
|
+
"--output", training_output,
|
|
63
|
+
"--device", "cpu"
|
|
64
|
+
],
|
|
65
|
+
capture_output=True,
|
|
66
|
+
text=True,
|
|
67
|
+
timeout=300
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
assert train_result.returncode == 0, f"Training failed: {train_result.stderr}"
|
|
71
|
+
assert os.path.exists(vector_path), "Vector was not saved"
|
|
72
|
+
|
|
73
|
+
# Now use it for inference
|
|
74
|
+
inference_result = subprocess.run(
|
|
75
|
+
[
|
|
76
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
77
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
78
|
+
"--layer", "3",
|
|
79
|
+
"--steering-method", "CAA",
|
|
80
|
+
"--steering-strength", "1.5",
|
|
81
|
+
"--limit", "10",
|
|
82
|
+
"--inference-only",
|
|
83
|
+
"--load-steering-vector", vector_path,
|
|
84
|
+
"--output", inference_output,
|
|
85
|
+
"--device", "cpu",
|
|
86
|
+
"--verbose"
|
|
87
|
+
],
|
|
88
|
+
capture_output=True,
|
|
89
|
+
text=True,
|
|
90
|
+
timeout=300
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Should complete without error
|
|
94
|
+
assert inference_result.returncode == 0, f"Inference failed: {inference_result.stderr}"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_steering_with_caa_l2():
|
|
98
|
+
"""Test steering with CAA method using strong steering strength."""
|
|
99
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
100
|
+
vector_path = os.path.join(tmpdir, "vector.pt")
|
|
101
|
+
training_output = os.path.join(tmpdir, "training")
|
|
102
|
+
inference_output = os.path.join(tmpdir, "inference")
|
|
103
|
+
|
|
104
|
+
# Train with CAA
|
|
105
|
+
train_result = subprocess.run(
|
|
106
|
+
[
|
|
107
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
108
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
109
|
+
"--layer", "3",
|
|
110
|
+
"--steering-method", "CAA",
|
|
111
|
+
"--limit", "20",
|
|
112
|
+
"--train-only",
|
|
113
|
+
"--save-steering-vector", vector_path,
|
|
114
|
+
"--output", training_output,
|
|
115
|
+
"--device", "cpu"
|
|
116
|
+
],
|
|
117
|
+
capture_output=True,
|
|
118
|
+
text=True,
|
|
119
|
+
timeout=300
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
assert train_result.returncode == 0
|
|
123
|
+
|
|
124
|
+
# Use with stronger steering
|
|
125
|
+
inference_result = subprocess.run(
|
|
126
|
+
[
|
|
127
|
+
"python", "-m", "wisent.core.main", "tasks", "boolq",
|
|
128
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
129
|
+
"--layer", "3",
|
|
130
|
+
"--steering-method", "CAA",
|
|
131
|
+
"--steering-strength", "2.0",
|
|
132
|
+
"--limit", "10",
|
|
133
|
+
"--inference-only",
|
|
134
|
+
"--load-steering-vector", vector_path,
|
|
135
|
+
"--output", inference_output,
|
|
136
|
+
"--device", "cpu",
|
|
137
|
+
"--verbose"
|
|
138
|
+
],
|
|
139
|
+
capture_output=True,
|
|
140
|
+
text=True,
|
|
141
|
+
timeout=300
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Should complete without error
|
|
145
|
+
assert inference_result.returncode == 0, f"Inference failed: {inference_result.stderr}"
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
if __name__ == "__main__":
|
|
149
|
+
pytest.main([__file__, "-v"])
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test for only_generate.sh example.
|
|
3
|
+
|
|
4
|
+
This test validates that the generate-responses command works correctly
|
|
5
|
+
for basic response generation without steering or classification.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import subprocess
|
|
9
|
+
import pytest
|
|
10
|
+
import tempfile
|
|
11
|
+
import os
|
|
12
|
+
import json
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_generate_basic():
|
|
16
|
+
"""Test basic response generation from a task."""
|
|
17
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
18
|
+
output_file = os.path.join(tmpdir, "responses.json")
|
|
19
|
+
|
|
20
|
+
result = subprocess.run(
|
|
21
|
+
[
|
|
22
|
+
"python", "-m", "wisent.core.main", "generate-responses",
|
|
23
|
+
"meta-llama/Llama-3.2-1B-Instruct",
|
|
24
|
+
"--task", "boolq",
|
|
25
|
+
"--num-questions", "3",
|
|
26
|
+
"--max-new-tokens", "50",
|
|
27
|
+
"--temperature", "0.7",
|
|
28
|
+
"--top-p", "0.95",
|
|
29
|
+
"--output", output_file,
|
|
30
|
+
"--device", "cpu",
|
|
31
|
+
"--verbose"
|
|
32
|
+
],
|
|
33
|
+
capture_output=True,
|
|
34
|
+
text=True,
|
|
35
|
+
timeout=180
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Should complete without error
|
|
39
|
+
assert result.returncode == 0, f"Command failed with: {result.stderr}"
|
|
40
|
+
|
|
41
|
+
# Output file should exist
|
|
42
|
+
assert os.path.exists(output_file), "Output file was not created"
|
|
43
|
+
|
|
44
|
+
# Output file should contain valid JSON
|
|
45
|
+
with open(output_file, 'r') as f:
|
|
46
|
+
data = json.load(f)
|
|
47
|
+
assert isinstance(data, (list, dict)), "Output should be JSON"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_generate_deterministic():
|
|
51
|
+
"""Test deterministic generation with temperature=0."""
|
|
52
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
53
|
+
output_file = os.path.join(tmpdir, "deterministic.json")
|
|
54
|
+
|
|
55
|
+
result = subprocess.run(
|
|
56
|
+
[
|
|
57
|
+
"python", "-m", "wisent.core.main", "generate-responses",
|
|
58
|
+
"meta-llama/Llama-3.2-1B-Instruct",
|
|
59
|
+
"--task", "boolq",
|
|
60
|
+
"--num-questions", "2",
|
|
61
|
+
"--max-new-tokens", "30",
|
|
62
|
+
"--temperature", "0.0",
|
|
63
|
+
"--output", output_file,
|
|
64
|
+
"--device", "cpu",
|
|
65
|
+
"--verbose"
|
|
66
|
+
],
|
|
67
|
+
capture_output=True,
|
|
68
|
+
text=True,
|
|
69
|
+
timeout=180
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Should complete without error
|
|
73
|
+
assert result.returncode == 0, f"Command failed with: {result.stderr}"
|
|
74
|
+
|
|
75
|
+
# Output file should exist
|
|
76
|
+
assert os.path.exists(output_file), "Output file was not created"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_generate_creative():
|
|
80
|
+
"""Test creative generation with higher temperature."""
|
|
81
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
82
|
+
output_file = os.path.join(tmpdir, "creative.json")
|
|
83
|
+
|
|
84
|
+
result = subprocess.run(
|
|
85
|
+
[
|
|
86
|
+
"python", "-m", "wisent.core.main", "generate-responses",
|
|
87
|
+
"meta-llama/Llama-3.2-1B-Instruct",
|
|
88
|
+
"--task", "boolq",
|
|
89
|
+
"--num-questions", "2",
|
|
90
|
+
"--max-new-tokens", "40",
|
|
91
|
+
"--temperature", "1.0",
|
|
92
|
+
"--top-p", "0.9",
|
|
93
|
+
"--output", output_file,
|
|
94
|
+
"--device", "cpu",
|
|
95
|
+
"--verbose"
|
|
96
|
+
],
|
|
97
|
+
capture_output=True,
|
|
98
|
+
text=True,
|
|
99
|
+
timeout=180
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Should complete without error
|
|
103
|
+
assert result.returncode == 0, f"Command failed with: {result.stderr}"
|
|
104
|
+
|
|
105
|
+
# Output file should exist
|
|
106
|
+
assert os.path.exists(output_file), "Output file was not created"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
if __name__ == "__main__":
|
|
110
|
+
pytest.main([__file__, "-v"])
|
|
File without changes
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test for multi_steer_from_trained_vectors.sh example.
|
|
3
|
+
|
|
4
|
+
This test validates practical use cases of combining pre-trained vectors
|
|
5
|
+
for different personas and scenarios (tech doc writer, teacher, etc).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import subprocess
|
|
9
|
+
import pytest
|
|
10
|
+
import tempfile
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_named_vector(tmpdir, name, trait_label):
|
|
15
|
+
"""Helper to create a named test vector."""
|
|
16
|
+
vector_path = os.path.join(tmpdir, f"{name}.pt")
|
|
17
|
+
|
|
18
|
+
result = subprocess.run(
|
|
19
|
+
[
|
|
20
|
+
"python", "-m", "wisent.core.main", "generate-vector-from-task",
|
|
21
|
+
"--task", "boolq",
|
|
22
|
+
"--trait-label", trait_label,
|
|
23
|
+
"--output", vector_path,
|
|
24
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
25
|
+
"--num-pairs", "10",
|
|
26
|
+
"--layers", "3",
|
|
27
|
+
"--token-aggregation", "average",
|
|
28
|
+
"--method", "caa",
|
|
29
|
+
"--normalize",
|
|
30
|
+
"--device", "cpu"
|
|
31
|
+
],
|
|
32
|
+
capture_output=True,
|
|
33
|
+
text=True,
|
|
34
|
+
timeout=300
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
assert result.returncode == 0, f"Failed to create {name} vector: {result.stderr}"
|
|
38
|
+
return vector_path
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_technical_documentation_persona():
|
|
42
|
+
"""Test combining vectors for technical documentation writer persona."""
|
|
43
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
44
|
+
formal_vector = create_named_vector(tmpdir, "formal", "formal_tone")
|
|
45
|
+
technical_vector = create_named_vector(tmpdir, "technical", "technical")
|
|
46
|
+
combined_path = os.path.join(tmpdir, "tech_doc.pt")
|
|
47
|
+
|
|
48
|
+
result = subprocess.run(
|
|
49
|
+
[
|
|
50
|
+
"python", "-m", "wisent.core.main", "multi-steer",
|
|
51
|
+
"--vector", f"{formal_vector}:0.5",
|
|
52
|
+
"--vector", f"{technical_vector}:0.5",
|
|
53
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
54
|
+
"--layer", "3",
|
|
55
|
+
"--method", "CAA",
|
|
56
|
+
"--prompt", "Explain REST APIs.",
|
|
57
|
+
"--max-new-tokens", "50",
|
|
58
|
+
"--normalize-weights",
|
|
59
|
+
"--save-combined", combined_path,
|
|
60
|
+
"--device", "cpu",
|
|
61
|
+
"--verbose"
|
|
62
|
+
],
|
|
63
|
+
capture_output=True,
|
|
64
|
+
text=True,
|
|
65
|
+
timeout=180
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
assert result.returncode == 0, f"Command failed: {result.stderr}"
|
|
69
|
+
assert os.path.exists(combined_path), "Combined vector not saved"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_friendly_teacher_persona():
|
|
73
|
+
"""Test combining vectors for friendly teacher persona."""
|
|
74
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
75
|
+
friendly_vector = create_named_vector(tmpdir, "friendly", "friendly")
|
|
76
|
+
detailed_vector = create_named_vector(tmpdir, "detailed", "detailed")
|
|
77
|
+
combined_path = os.path.join(tmpdir, "teacher.pt")
|
|
78
|
+
|
|
79
|
+
result = subprocess.run(
|
|
80
|
+
[
|
|
81
|
+
"python", "-m", "wisent.core.main", "multi-steer",
|
|
82
|
+
"--vector", f"{friendly_vector}:0.6",
|
|
83
|
+
"--vector", f"{detailed_vector}:0.4",
|
|
84
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
85
|
+
"--layer", "3",
|
|
86
|
+
"--method", "CAA",
|
|
87
|
+
"--prompt", "How does photosynthesis work?",
|
|
88
|
+
"--max-new-tokens", "50",
|
|
89
|
+
"--normalize-weights",
|
|
90
|
+
"--save-combined", combined_path,
|
|
91
|
+
"--device", "cpu",
|
|
92
|
+
"--verbose"
|
|
93
|
+
],
|
|
94
|
+
capture_output=True,
|
|
95
|
+
text=True,
|
|
96
|
+
timeout=180
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
assert result.returncode == 0, f"Command failed: {result.stderr}"
|
|
100
|
+
assert os.path.exists(combined_path), "Combined vector not saved"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_executive_summary_persona():
|
|
104
|
+
"""Test combining vectors for executive summary writer."""
|
|
105
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
106
|
+
concise_vector = create_named_vector(tmpdir, "concise", "concise")
|
|
107
|
+
formal_vector = create_named_vector(tmpdir, "formal", "formal")
|
|
108
|
+
combined_path = os.path.join(tmpdir, "executive.pt")
|
|
109
|
+
|
|
110
|
+
result = subprocess.run(
|
|
111
|
+
[
|
|
112
|
+
"python", "-m", "wisent.core.main", "multi-steer",
|
|
113
|
+
"--vector", f"{concise_vector}:0.6",
|
|
114
|
+
"--vector", f"{formal_vector}:0.4",
|
|
115
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
116
|
+
"--layer", "3",
|
|
117
|
+
"--method", "CAA",
|
|
118
|
+
"--prompt", "Benefits of cloud computing.",
|
|
119
|
+
"--max-new-tokens", "50",
|
|
120
|
+
"--normalize-weights",
|
|
121
|
+
"--save-combined", combined_path,
|
|
122
|
+
"--device", "cpu",
|
|
123
|
+
"--verbose"
|
|
124
|
+
],
|
|
125
|
+
capture_output=True,
|
|
126
|
+
text=True,
|
|
127
|
+
timeout=180
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
assert result.returncode == 0, f"Command failed: {result.stderr}"
|
|
131
|
+
assert os.path.exists(combined_path), "Combined vector not saved"
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def test_comparing_weight_ratios():
|
|
135
|
+
"""Test comparing different weight ratios for the same prompt."""
|
|
136
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
137
|
+
vector1 = create_named_vector(tmpdir, "v1", "trait1")
|
|
138
|
+
vector2 = create_named_vector(tmpdir, "v2", "trait2")
|
|
139
|
+
|
|
140
|
+
prompt = "Explain machine learning."
|
|
141
|
+
|
|
142
|
+
# Configuration A: More weight on vector1
|
|
143
|
+
result_a = subprocess.run(
|
|
144
|
+
[
|
|
145
|
+
"python", "-m", "wisent.core.main", "multi-steer",
|
|
146
|
+
"--vector", f"{vector1}:0.7",
|
|
147
|
+
"--vector", f"{vector2}:0.3",
|
|
148
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
149
|
+
"--layer", "3",
|
|
150
|
+
"--method", "CAA",
|
|
151
|
+
"--prompt", prompt,
|
|
152
|
+
"--max-new-tokens", "50",
|
|
153
|
+
"--normalize-weights",
|
|
154
|
+
"--device", "cpu",
|
|
155
|
+
"--verbose"
|
|
156
|
+
],
|
|
157
|
+
capture_output=True,
|
|
158
|
+
text=True,
|
|
159
|
+
timeout=180
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
assert result_a.returncode == 0, f"Config A failed: {result_a.stderr}"
|
|
163
|
+
|
|
164
|
+
# Configuration B: More weight on vector2
|
|
165
|
+
result_b = subprocess.run(
|
|
166
|
+
[
|
|
167
|
+
"python", "-m", "wisent.core.main", "multi-steer",
|
|
168
|
+
"--vector", f"{vector1}:0.3",
|
|
169
|
+
"--vector", f"{vector2}:0.7",
|
|
170
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
171
|
+
"--layer", "3",
|
|
172
|
+
"--method", "CAA",
|
|
173
|
+
"--prompt", prompt,
|
|
174
|
+
"--max-new-tokens", "50",
|
|
175
|
+
"--normalize-weights",
|
|
176
|
+
"--device", "cpu",
|
|
177
|
+
"--verbose"
|
|
178
|
+
],
|
|
179
|
+
capture_output=True,
|
|
180
|
+
text=True,
|
|
181
|
+
timeout=180
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
assert result_b.returncode == 0, f"Config B failed: {result_b.stderr}"
|
|
185
|
+
|
|
186
|
+
# Configuration C: Balanced
|
|
187
|
+
result_c = subprocess.run(
|
|
188
|
+
[
|
|
189
|
+
"python", "-m", "wisent.core.main", "multi-steer",
|
|
190
|
+
"--vector", f"{vector1}:0.5",
|
|
191
|
+
"--vector", f"{vector2}:0.5",
|
|
192
|
+
"--model", "meta-llama/Llama-3.2-1B-Instruct",
|
|
193
|
+
"--layer", "3",
|
|
194
|
+
"--method", "CAA",
|
|
195
|
+
"--prompt", prompt,
|
|
196
|
+
"--max-new-tokens", "50",
|
|
197
|
+
"--normalize-weights",
|
|
198
|
+
"--device", "cpu",
|
|
199
|
+
"--verbose"
|
|
200
|
+
],
|
|
201
|
+
capture_output=True,
|
|
202
|
+
text=True,
|
|
203
|
+
timeout=180
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
assert result_c.returncode == 0, f"Config C failed: {result_c.stderr}"
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
if __name__ == "__main__":
|
|
210
|
+
pytest.main([__file__, "-v"])
|