wisent 0.5.12__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wisent might be problematic. Click here for more details.
- wisent/__init__.py +1 -1
- wisent/core/activations/__init__.py +26 -0
- wisent/core/activations/activations.py +96 -0
- wisent/core/activations/activations_collector.py +71 -20
- wisent/core/activations/prompt_construction_strategy.py +47 -0
- wisent/core/agent/budget.py +2 -2
- wisent/core/agent/device_benchmarks.py +1 -1
- wisent/core/agent/diagnose/classifier_marketplace.py +8 -8
- wisent/core/agent/diagnose/response_diagnostics.py +4 -4
- wisent/core/agent/diagnose/synthetic_classifier_option.py +1 -1
- wisent/core/agent/diagnose/tasks/task_manager.py +3 -3
- wisent/core/agent/diagnose.py +2 -1
- wisent/core/autonomous_agent.py +10 -2
- wisent/core/benchmark_extractors.py +293 -0
- wisent/core/bigcode_integration.py +20 -7
- wisent/core/branding.py +108 -0
- wisent/core/cli/__init__.py +15 -0
- wisent/core/cli/create_steering_vector.py +138 -0
- wisent/core/cli/evaluate_responses.py +715 -0
- wisent/core/cli/generate_pairs.py +128 -0
- wisent/core/cli/generate_pairs_from_task.py +119 -0
- wisent/core/cli/generate_responses.py +129 -0
- wisent/core/cli/generate_vector_from_synthetic.py +149 -0
- wisent/core/cli/generate_vector_from_task.py +147 -0
- wisent/core/cli/get_activations.py +191 -0
- wisent/core/cli/optimize_classification.py +339 -0
- wisent/core/cli/optimize_steering.py +364 -0
- wisent/core/cli/tasks.py +182 -0
- wisent/core/cli_logger.py +22 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +27 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +49 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +146 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +129 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livecodebench.py +367 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +116 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad2.py +124 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sst2.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +127 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_gen.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +127 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +1 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +1 -1
- wisent/core/data_loaders/__init__.py +235 -0
- wisent/core/data_loaders/loaders/lm_loader.py +2 -2
- wisent/core/data_loaders/loaders/task_interface_loader.py +300 -0
- wisent/{cli/data_loaders/data_loader_rotator.py → core/data_loaders/rotator.py} +1 -1
- wisent/core/download_full_benchmarks.py +79 -2
- wisent/core/evaluators/benchmark_specific/__init__.py +26 -0
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/evaluator.py +17 -17
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/cpp_sanitizer.py +2 -2
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/java_sanitizer.py +2 -2
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/python_sanitizer.py +2 -2
- wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/__init__.py +3 -0
- wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/provider.py +305 -0
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/runtime.py +36 -4
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/entrypoint.py +2 -4
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/recipes.py +1 -1
- wisent/core/evaluators/benchmark_specific/coding/solution_generator.py +258 -0
- wisent/core/evaluators/benchmark_specific/exact_match_evaluator.py +79 -0
- wisent/core/evaluators/benchmark_specific/f1_evaluator.py +101 -0
- wisent/core/evaluators/benchmark_specific/generation_evaluator.py +197 -0
- wisent/core/{log_likelihoods_evaluator.py → evaluators/benchmark_specific/log_likelihoods_evaluator.py} +10 -2
- wisent/core/evaluators/benchmark_specific/perplexity_evaluator.py +140 -0
- wisent/core/evaluators/benchmark_specific/personalization_evaluator.py +250 -0
- wisent/{cli/evaluators/evaluator_rotator.py → core/evaluators/rotator.py} +4 -4
- wisent/core/lm_eval_harness_ground_truth.py +3 -2
- wisent/core/main.py +57 -0
- wisent/core/model_persistence.py +2 -2
- wisent/core/models/wisent_model.py +6 -6
- wisent/core/optuna/classifier/optuna_classifier_optimizer.py +2 -2
- wisent/core/optuna/steering/steering_optimization.py +1 -1
- wisent/core/parser_arguments/__init__.py +10 -0
- wisent/core/parser_arguments/agent_parser.py +110 -0
- wisent/core/parser_arguments/configure_model_parser.py +7 -0
- wisent/core/parser_arguments/create_steering_vector_parser.py +59 -0
- wisent/core/parser_arguments/evaluate_parser.py +40 -0
- wisent/core/parser_arguments/evaluate_responses_parser.py +10 -0
- wisent/core/parser_arguments/full_optimize_parser.py +115 -0
- wisent/core/parser_arguments/generate_pairs_from_task_parser.py +33 -0
- wisent/core/parser_arguments/generate_pairs_parser.py +29 -0
- wisent/core/parser_arguments/generate_responses_parser.py +15 -0
- wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +127 -0
- wisent/core/parser_arguments/generate_vector_from_task_parser.py +127 -0
- wisent/core/parser_arguments/generate_vector_parser.py +90 -0
- wisent/core/parser_arguments/get_activations_parser.py +90 -0
- wisent/core/parser_arguments/main_parser.py +152 -0
- wisent/core/parser_arguments/model_config_parser.py +59 -0
- wisent/core/parser_arguments/monitor_parser.py +17 -0
- wisent/core/parser_arguments/multi_steer_parser.py +47 -0
- wisent/core/parser_arguments/optimize_classification_parser.py +67 -0
- wisent/core/parser_arguments/optimize_sample_size_parser.py +58 -0
- wisent/core/parser_arguments/optimize_steering_parser.py +147 -0
- wisent/core/parser_arguments/synthetic_parser.py +93 -0
- wisent/core/parser_arguments/tasks_parser.py +584 -0
- wisent/core/parser_arguments/test_nonsense_parser.py +26 -0
- wisent/core/parser_arguments/utils.py +111 -0
- wisent/core/prompts/core/prompt_formater.py +3 -3
- wisent/core/prompts/prompt_stratiegies/direct_completion.py +2 -0
- wisent/core/prompts/prompt_stratiegies/instruction_following.py +2 -0
- wisent/core/prompts/prompt_stratiegies/multiple_choice.py +2 -0
- wisent/core/prompts/prompt_stratiegies/role_playing.py +2 -0
- wisent/{cli/steering_methods/steering_rotator.py → core/steering_methods/rotator.py} +4 -4
- wisent/core/steering_optimizer.py +45 -21
- wisent/{synthetic → core/synthetic}/cleaners/deduper_cleaner.py +3 -3
- wisent/{synthetic → core/synthetic}/cleaners/methods/base_dedupers.py +2 -2
- wisent/{synthetic → core/synthetic}/cleaners/methods/base_refusalers.py +1 -1
- wisent/{synthetic → core/synthetic}/cleaners/pairs_cleaner.py +5 -5
- wisent/{synthetic → core/synthetic}/cleaners/refusaler_cleaner.py +4 -4
- wisent/{synthetic → core/synthetic}/db_instructions/mini_dp.py +1 -1
- wisent/{synthetic → core/synthetic}/generators/diversities/methods/fast_diversity.py +1 -1
- wisent/{synthetic → core/synthetic}/generators/pairs_generator.py +38 -12
- wisent/core/tasks/livecodebench_task.py +4 -103
- wisent/core/timing_calibration.py +1 -1
- {wisent-0.5.12.dist-info → wisent-0.5.13.dist-info}/METADATA +3 -3
- wisent-0.5.13.dist-info/RECORD +294 -0
- wisent-0.5.13.dist-info/entry_points.txt +2 -0
- wisent/benchmarks/coding/providers/livecodebench/provider.py +0 -53
- wisent/classifiers/core/atoms.py +0 -747
- wisent/classifiers/models/logistic.py +0 -29
- wisent/classifiers/models/mlp.py +0 -47
- wisent/cli/classifiers/classifier_rotator.py +0 -137
- wisent/cli/cli_logger.py +0 -142
- wisent/cli/wisent_cli/commands/help_cmd.py +0 -52
- wisent/cli/wisent_cli/commands/listing.py +0 -154
- wisent/cli/wisent_cli/commands/train_cmd.py +0 -322
- wisent/cli/wisent_cli/main.py +0 -93
- wisent/cli/wisent_cli/shell.py +0 -80
- wisent/cli/wisent_cli/ui.py +0 -69
- wisent/cli/wisent_cli/util/aggregations.py +0 -43
- wisent/cli/wisent_cli/util/parsing.py +0 -126
- wisent/cli/wisent_cli/version.py +0 -4
- wisent/opti/methods/__init__.py +0 -0
- wisent/synthetic/__init__.py +0 -0
- wisent/synthetic/cleaners/__init__.py +0 -0
- wisent/synthetic/cleaners/core/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
- wisent/synthetic/db_instructions/__init__.py +0 -0
- wisent/synthetic/db_instructions/core/__init__.py +0 -0
- wisent/synthetic/generators/__init__.py +0 -0
- wisent/synthetic/generators/core/__init__.py +0 -0
- wisent/synthetic/generators/diversities/__init__.py +0 -0
- wisent/synthetic/generators/diversities/core/__init__.py +0 -0
- wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
- wisent-0.5.12.dist-info/RECORD +0 -220
- /wisent/{benchmarks → core/evaluators/benchmark_specific/coding}/__init__.py +0 -0
- /wisent/{benchmarks/coding → core/evaluators/benchmark_specific/coding/metrics}/__init__.py +0 -0
- /wisent/{benchmarks/coding/metrics → core/evaluators/benchmark_specific/coding/metrics/core}/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/core/atoms.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/passk.py +0 -0
- /wisent/{benchmarks/coding/metrics/core → core/evaluators/benchmark_specific/coding/output_sanitizer}/__init__.py +0 -0
- /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/output_sanitizer/core}/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/core/atoms.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/utils.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/__init__.py +0 -0
- /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/providers}/core/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/core/atoms.py +0 -0
- /wisent/{benchmarks/coding/providers/core → core/evaluators/benchmark_specific/coding/safe_docker}/__init__.py +0 -0
- /wisent/{benchmarks/coding/providers/livecodebench → core/evaluators/benchmark_specific/coding/safe_docker/core}/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/atoms.py +0 -0
- /wisent/{benchmarks/coding/safe_docker → core/opti}/__init__.py +0 -0
- /wisent/{benchmarks/coding/safe_docker → core/opti}/core/__init__.py +0 -0
- /wisent/{opti → core/opti}/core/atoms.py +0 -0
- /wisent/{classifiers → core/opti/methods}/__init__.py +0 -0
- /wisent/{opti → core/opti}/methods/opti_classificator.py +0 -0
- /wisent/{opti → core/opti}/methods/opti_steering.py +0 -0
- /wisent/{classifiers/core → core/synthetic}/__init__.py +0 -0
- /wisent/{classifiers/models → core/synthetic/cleaners}/__init__.py +0 -0
- /wisent/{cli → core/synthetic/cleaners/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/cleaners/core/atoms.py +0 -0
- /wisent/{cli/classifiers → core/synthetic/cleaners/methods}/__init__.py +0 -0
- /wisent/{cli/data_loaders → core/synthetic/cleaners/methods/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/cleaners/methods/core/atoms.py +0 -0
- /wisent/{cli/evaluators → core/synthetic/db_instructions}/__init__.py +0 -0
- /wisent/{cli/steering_methods → core/synthetic/db_instructions/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/db_instructions/core/atoms.py +0 -0
- /wisent/{cli/wisent_cli → core/synthetic/generators}/__init__.py +0 -0
- /wisent/{cli/wisent_cli/commands → core/synthetic/generators/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/generators/core/atoms.py +0 -0
- /wisent/{cli/wisent_cli/util → core/synthetic/generators/diversities}/__init__.py +0 -0
- /wisent/{opti → core/synthetic/generators/diversities/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/generators/diversities/core/core.py +0 -0
- /wisent/{opti/core → core/synthetic/generators/diversities/methods}/__init__.py +0 -0
- {wisent-0.5.12.dist-info → wisent-0.5.13.dist-info}/WHEEL +0 -0
- {wisent-0.5.12.dist-info → wisent-0.5.13.dist-info}/licenses/LICENSE +0 -0
- {wisent-0.5.12.dist-info → wisent-0.5.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Parser setup for the 'agent' command."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def setup_agent_parser(parser):
|
|
5
|
+
"""Set up the agent subcommand parser."""
|
|
6
|
+
parser.add_argument("prompt", type=str, help="Prompt to send to the autonomous agent")
|
|
7
|
+
parser.add_argument("--model", type=str, default="meta-llama/Llama-3.1-8B-Instruct", help="Model to use")
|
|
8
|
+
parser.add_argument("--layer", type=int, help="Layer to use (overrides parameter file)")
|
|
9
|
+
parser.add_argument(
|
|
10
|
+
"--quality-threshold", type=float, default=0.3, help="Quality threshold for classifiers (default: 0.3)"
|
|
11
|
+
)
|
|
12
|
+
parser.add_argument(
|
|
13
|
+
"--time-budget",
|
|
14
|
+
type=float,
|
|
15
|
+
default=10.0,
|
|
16
|
+
help="Time budget in minutes for creating classifiers (default: 10.0)",
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument("--max-attempts", type=int, default=3, help="Maximum improvement attempts (default: 3)")
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--max-classifiers", type=int, default=None, help="Maximum classifiers to use (default: no limit)"
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
|
|
23
|
+
|
|
24
|
+
# Steering method arguments
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--steering-method",
|
|
27
|
+
type=str,
|
|
28
|
+
default="CAA",
|
|
29
|
+
choices=["CAA", "HPR", "DAC", "BiPO", "KSteering"],
|
|
30
|
+
help="Steering method to use (default: CAA)",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--steering-strength", type=float, default=1.0, help="Strength of steering vector application (default: 1.0)"
|
|
34
|
+
)
|
|
35
|
+
parser.add_argument("--steering-mode", action="store_true", help="Enable steering mode")
|
|
36
|
+
|
|
37
|
+
# Normalization parameters
|
|
38
|
+
parser.add_argument("--normalize-mode", action="store_true", help="Enable normalization of steering vectors")
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--normalization-method",
|
|
41
|
+
type=str,
|
|
42
|
+
default="none",
|
|
43
|
+
choices=["none", "l2_unit", "l2_norm", "max_norm"],
|
|
44
|
+
help="Normalization method for steering vectors (default: none)",
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument("--target-norm", type=float, default=None, help="Target norm for steering vectors")
|
|
47
|
+
|
|
48
|
+
# HPR (Householder Pseudo-Rotation) parameters
|
|
49
|
+
parser.add_argument("--hpr-beta", type=float, default=1.0, help="Beta parameter for HPR steering (default: 1.0)")
|
|
50
|
+
|
|
51
|
+
# DAC (Dynamic Activation Composition) parameters
|
|
52
|
+
parser.add_argument("--dac-dynamic-control", action="store_true", help="Enable dynamic control for DAC steering")
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--dac-entropy-threshold", type=float, default=1.0, help="Entropy threshold for DAC steering (default: 1.0)"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# BiPO (Bi-directional Preference Optimization) parameters
|
|
58
|
+
parser.add_argument("--bipo-beta", type=float, default=0.1, help="Beta parameter for BiPO steering (default: 0.1)")
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--bipo-learning-rate", type=float, default=5e-4, help="Learning rate for BiPO steering (default: 5e-4)"
|
|
61
|
+
)
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--bipo-epochs", type=int, default=100, help="Number of epochs for BiPO steering (default: 100)"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# KSteering parameters
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"--ksteering-num-labels", type=int, default=6, help="Number of labels for K-steering (default: 6)"
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--ksteering-hidden-dim", type=int, default=512, help="Hidden dimension for K-steering (default: 512)"
|
|
72
|
+
)
|
|
73
|
+
parser.add_argument(
|
|
74
|
+
"--ksteering-learning-rate", type=float, default=1e-3, help="Learning rate for K-steering (default: 1e-3)"
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--ksteering-classifier-epochs", type=int, default=100, help="Classifier epochs for K-steering (default: 100)"
|
|
78
|
+
)
|
|
79
|
+
parser.add_argument(
|
|
80
|
+
"--ksteering-target-labels",
|
|
81
|
+
type=str,
|
|
82
|
+
default="0",
|
|
83
|
+
help="Target labels for K-steering (comma-separated, default: '0')",
|
|
84
|
+
)
|
|
85
|
+
parser.add_argument(
|
|
86
|
+
"--ksteering-avoid-labels",
|
|
87
|
+
type=str,
|
|
88
|
+
default="",
|
|
89
|
+
help="Avoid labels for K-steering (comma-separated, default: '')",
|
|
90
|
+
)
|
|
91
|
+
parser.add_argument(
|
|
92
|
+
"--ksteering-alpha", type=float, default=50.0, help="Alpha parameter for K-steering (default: 50.0)"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Quality Control System parameters
|
|
96
|
+
parser.add_argument(
|
|
97
|
+
"--enable-quality-control",
|
|
98
|
+
action="store_true",
|
|
99
|
+
default=True,
|
|
100
|
+
help="Enable new quality control system (default: True)",
|
|
101
|
+
)
|
|
102
|
+
parser.add_argument(
|
|
103
|
+
"--max-quality-attempts",
|
|
104
|
+
type=int,
|
|
105
|
+
default=5,
|
|
106
|
+
help="Maximum attempts to achieve acceptable quality (default: 5)",
|
|
107
|
+
)
|
|
108
|
+
parser.add_argument(
|
|
109
|
+
"--show-parameter-reasoning", action="store_true", help="Display model's reasoning for parameter choices"
|
|
110
|
+
)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Parser setup for the 'configure-model' command."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def setup_configure_model_parser(parser):
|
|
5
|
+
"""Set up the configure-model subcommand parser."""
|
|
6
|
+
parser.add_argument("model", type=str, help="Model name to configure")
|
|
7
|
+
parser.add_argument("--force", action="store_true", help="Force reconfiguration even if model already has a config")
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Parser for the create-steering-vector command."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def setup_create_steering_vector_parser(parser: argparse.ArgumentParser) -> None:
|
|
7
|
+
"""
|
|
8
|
+
Set up the create-steering-vector command parser.
|
|
9
|
+
|
|
10
|
+
This command loads enriched pairs (with activations) from JSON and creates
|
|
11
|
+
steering vectors using a specified method (e.g., CAA).
|
|
12
|
+
"""
|
|
13
|
+
# Input/Output
|
|
14
|
+
parser.add_argument(
|
|
15
|
+
"enriched_pairs_file",
|
|
16
|
+
type=str,
|
|
17
|
+
help="Path to JSON file containing contrastive pairs with activations"
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--output",
|
|
21
|
+
type=str,
|
|
22
|
+
required=True,
|
|
23
|
+
help="Output file path for steering vector (JSON)"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Steering method
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--method",
|
|
29
|
+
type=str,
|
|
30
|
+
choices=["caa"],
|
|
31
|
+
default="caa",
|
|
32
|
+
help="Steering method to use (default: caa)"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Method parameters
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
"--normalize",
|
|
38
|
+
action="store_true",
|
|
39
|
+
default=True,
|
|
40
|
+
help="L2-normalize steering vectors (default: True)"
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--no-normalize",
|
|
44
|
+
action="store_false",
|
|
45
|
+
dest="normalize",
|
|
46
|
+
help="Do not L2-normalize steering vectors"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Display options
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"--verbose",
|
|
52
|
+
action="store_true",
|
|
53
|
+
help="Enable verbose output"
|
|
54
|
+
)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"--timing",
|
|
57
|
+
action="store_true",
|
|
58
|
+
help="Show timing information"
|
|
59
|
+
)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Parser setup for the 'evaluate' command."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def setup_evaluate_parser(parser):
|
|
5
|
+
"""Set up the evaluate subcommand parser for single-prompt evaluation."""
|
|
6
|
+
|
|
7
|
+
# Required arguments
|
|
8
|
+
parser.add_argument("--vector", type=str, required=True, help="Path to steering vector file (.pt)")
|
|
9
|
+
parser.add_argument("--prompt", type=str, required=True, help="Prompt to evaluate")
|
|
10
|
+
parser.add_argument(
|
|
11
|
+
"--model", type=str, required=True, help="Model name or path (used for both generation and evaluation)"
|
|
12
|
+
)
|
|
13
|
+
parser.add_argument("--trait", type=str, required=True, help="Trait name (e.g., 'catholic', 'cynical')")
|
|
14
|
+
|
|
15
|
+
# Optional model configuration
|
|
16
|
+
parser.add_argument("--device", type=str, default=None, help="Device to run on (default: auto-detect)")
|
|
17
|
+
|
|
18
|
+
# Optional steering parameters
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--steering-strength", type=float, default=2.0, help="Steering strength to apply (default: 2.0)"
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument("--max-new-tokens", type=int, default=100, help="Maximum new tokens to generate (default: 100)")
|
|
23
|
+
parser.add_argument(
|
|
24
|
+
"--trait-description",
|
|
25
|
+
type=str,
|
|
26
|
+
default=None,
|
|
27
|
+
help="Optional description of the trait (default: use trait name)",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Optional threshold parameters
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--trait-threshold", type=float, default=None, help="Minimum trait quality threshold (-1 to 1 scale)"
|
|
33
|
+
)
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--answer-threshold", type=float, default=None, help="Minimum answer quality threshold (0 to 1 scale)"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Output options
|
|
39
|
+
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
|
|
40
|
+
parser.add_argument("--json", action="store_true", help="Output results as JSON")
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Parser setup for the 'evaluate-responses' command."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def setup_evaluate_responses_parser(parser):
|
|
5
|
+
"""Set up the evaluate-responses command parser."""
|
|
6
|
+
parser.add_argument("--input", type=str, required=True, help="Input JSON file with generated responses")
|
|
7
|
+
parser.add_argument("--output", type=str, required=True, help="Output JSON file for evaluation results")
|
|
8
|
+
parser.add_argument("--task", type=str, help="Task name (optional, overrides task from input JSON)")
|
|
9
|
+
parser.add_argument("--trait", type=str, help="Personality trait to evaluate (optional, for personalization tasks)")
|
|
10
|
+
parser.add_argument("--verbose", action="store_true", help="Verbose output")
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Parser setup for the 'full-optimize' command."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def setup_full_optimizer_parser(parser):
|
|
5
|
+
"""Set up the full-optimize subcommand parser."""
|
|
6
|
+
parser.add_argument("model", type=str, help="Model name or path to optimize")
|
|
7
|
+
|
|
8
|
+
# Task selection - mutually exclusive options
|
|
9
|
+
task_group = parser.add_mutually_exclusive_group()
|
|
10
|
+
task_group.add_argument("--tasks", type=str, nargs="+", help="Specific tasks to optimize")
|
|
11
|
+
task_group.add_argument(
|
|
12
|
+
"--skills", type=str, nargs="+", help="Select tasks by skill categories (e.g., coding, mathematics, reasoning)"
|
|
13
|
+
)
|
|
14
|
+
task_group.add_argument(
|
|
15
|
+
"--risks",
|
|
16
|
+
type=str,
|
|
17
|
+
nargs="+",
|
|
18
|
+
help="Select tasks by risk categories (e.g., harmfulness, toxicity, hallucination)",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# General limit that applies to all optimizations unless overridden
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"--limit",
|
|
24
|
+
type=int,
|
|
25
|
+
default=100,
|
|
26
|
+
help="Sample limit for all optimizations (default: 100). Can be overridden by specific limits below",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Specific limits (override general limit if provided)
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"--classification-limit",
|
|
32
|
+
type=int,
|
|
33
|
+
default=None,
|
|
34
|
+
help="Sample limit for classification optimization (overrides --limit)",
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
"--sample-size-limit",
|
|
38
|
+
type=int,
|
|
39
|
+
default=None,
|
|
40
|
+
help="Sample limit for sample size optimization (overrides --limit)",
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--steering-limit", type=int, default=None, help="Sample limit for steering optimization (overrides --limit)"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--sample-sizes",
|
|
48
|
+
type=int,
|
|
49
|
+
nargs="+",
|
|
50
|
+
default=[5, 10, 20, 50, 100, 200, 500],
|
|
51
|
+
help="Sample sizes to test (default: 5 10 20 50 100 200 500)",
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--skip-classification", action="store_true", help="Skip classification optimization and use existing config"
|
|
55
|
+
)
|
|
56
|
+
parser.add_argument("--skip-sample-size", action="store_true", help="Skip sample size optimization")
|
|
57
|
+
parser.add_argument("--skip-classifier-training", action="store_true", help="Skip final classifier training step")
|
|
58
|
+
parser.add_argument("--skip-control-vectors", action="store_true", help="Skip control vector training step")
|
|
59
|
+
|
|
60
|
+
# Steering optimization options
|
|
61
|
+
parser.add_argument("--skip-steering", action="store_true", help="Skip steering optimization")
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--steering-methods",
|
|
64
|
+
type=str,
|
|
65
|
+
nargs="+",
|
|
66
|
+
choices=["CAA", "HPR", "DAC", "BiPO", "KSteering"],
|
|
67
|
+
default=["CAA", "HPR", "DAC", "BiPO", "KSteering"],
|
|
68
|
+
help="Steering methods to test (default: all methods with parameter variations)",
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--steering-layer-range", type=str, default=None, help="Layer range for steering optimization (e.g., '0-5')"
|
|
72
|
+
)
|
|
73
|
+
parser.add_argument(
|
|
74
|
+
"--steering-strength-range",
|
|
75
|
+
type=float,
|
|
76
|
+
nargs="+",
|
|
77
|
+
default=[0.5, 1.0, 1.5, 2.0],
|
|
78
|
+
help="Steering strengths to test (default: 0.5 1.0 1.5 2.0)",
|
|
79
|
+
)
|
|
80
|
+
# Task selection options
|
|
81
|
+
parser.add_argument(
|
|
82
|
+
"--num-tasks",
|
|
83
|
+
type=int,
|
|
84
|
+
default=None,
|
|
85
|
+
help="Number of tasks to randomly select from matched tasks (default: all)",
|
|
86
|
+
)
|
|
87
|
+
parser.add_argument(
|
|
88
|
+
"--min-quality-score",
|
|
89
|
+
type=int,
|
|
90
|
+
default=2,
|
|
91
|
+
choices=[1, 2, 3, 4, 5],
|
|
92
|
+
help="Minimum quality score for tasks (default: 2)",
|
|
93
|
+
)
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"--task-seed", type=int, default=None, help="Random seed for task selection (for reproducibility)"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
parser.add_argument(
|
|
99
|
+
"--max-time-per-task", type=float, default=20.0, help="Maximum time per task in minutes (default: 20.0)"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
parser.add_argument("--device", type=str, default=None, help="Device to run on")
|
|
103
|
+
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
|
|
104
|
+
parser.add_argument("--save-plots", action="store_true", help="Save plots for both optimizations")
|
|
105
|
+
|
|
106
|
+
# Timing calibration options
|
|
107
|
+
parser.add_argument(
|
|
108
|
+
"--skip-timing-estimation", action="store_true", help="Skip timing estimation and proceed without time warnings"
|
|
109
|
+
)
|
|
110
|
+
parser.add_argument("--calibration-file", type=str, default=None, help="File to save/load calibration data")
|
|
111
|
+
parser.add_argument(
|
|
112
|
+
"--calibrate-only",
|
|
113
|
+
action="store_true",
|
|
114
|
+
help="Only run calibration and exit (saves to --calibration-file if provided)",
|
|
115
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Parser setup for the 'generate-pairs-from-task' command."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def setup_generate_pairs_from_task_parser(parser):
|
|
5
|
+
"""Set up the generate-pairs-from-task subcommand parser."""
|
|
6
|
+
parser.add_argument(
|
|
7
|
+
"task_name",
|
|
8
|
+
type=str,
|
|
9
|
+
help="Name of the lm-eval task (e.g., 'truthfulqa_mc1', 'hellaswag')"
|
|
10
|
+
)
|
|
11
|
+
parser.add_argument(
|
|
12
|
+
"--output",
|
|
13
|
+
type=str,
|
|
14
|
+
required=True,
|
|
15
|
+
help="Output file path for the generated pairs (JSON format)"
|
|
16
|
+
)
|
|
17
|
+
parser.add_argument(
|
|
18
|
+
"--limit",
|
|
19
|
+
type=int,
|
|
20
|
+
default=None,
|
|
21
|
+
help="Maximum number of pairs to generate (default: all available)"
|
|
22
|
+
)
|
|
23
|
+
parser.add_argument(
|
|
24
|
+
"--seed",
|
|
25
|
+
type=int,
|
|
26
|
+
default=42,
|
|
27
|
+
help="Random seed for reproducibility (default: 42)"
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--verbose",
|
|
31
|
+
action="store_true",
|
|
32
|
+
help="Enable verbose logging"
|
|
33
|
+
)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Parser setup for the 'generate-pairs' command."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def setup_generate_pairs_parser(parser):
|
|
5
|
+
"""Set up the generate-pairs subcommand parser."""
|
|
6
|
+
parser.add_argument(
|
|
7
|
+
"--trait", type=str, required=True, help="Natural language description of the desired trait or behavior"
|
|
8
|
+
)
|
|
9
|
+
parser.add_argument(
|
|
10
|
+
"--num-pairs", type=int, default=30, help="Number of contrastive pairs to generate (default: 30)"
|
|
11
|
+
)
|
|
12
|
+
parser.add_argument(
|
|
13
|
+
"--output", type=str, required=True, help="Output file path for the generated pairs (JSON format)"
|
|
14
|
+
)
|
|
15
|
+
parser.add_argument(
|
|
16
|
+
"--model", type=str, default="meta-llama/Llama-3.1-8B-Instruct", help="Model name or path to use for generation"
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument("--device", type=str, default=None, help="Device to run on")
|
|
19
|
+
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"--similarity-threshold",
|
|
22
|
+
type=float,
|
|
23
|
+
default=0.8,
|
|
24
|
+
help="Similarity threshold for deduplication (0-1, higher = more strict)",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument("--timing", action="store_true", help="Show detailed timing for each generation step")
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--max-workers", type=int, default=4, help="Number of parallel workers for generation (default: 4)"
|
|
29
|
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Parser setup for the 'generate-responses' command."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def setup_generate_responses_parser(parser):
|
|
5
|
+
"""Set up the generate-responses command parser."""
|
|
6
|
+
parser.add_argument("model", type=str, help="Model name or path")
|
|
7
|
+
parser.add_argument("--task", type=str, required=True, help="Task name (e.g., arc_easy, truthfulqa_mc1)")
|
|
8
|
+
parser.add_argument("--num-questions", type=int, default=10, help="Number of questions to generate responses for (default: 10)")
|
|
9
|
+
parser.add_argument("--max-new-tokens", type=int, default=128, help="Maximum tokens to generate (default: 128)")
|
|
10
|
+
parser.add_argument("--temperature", type=float, default=0.7, help="Temperature for generation (default: 0.7)")
|
|
11
|
+
parser.add_argument("--top-p", type=float, default=0.95, help="Top-p for nucleus sampling (default: 0.95)")
|
|
12
|
+
parser.add_argument("--device", type=str, default=None, help="Device to use (cpu, cuda, mps)")
|
|
13
|
+
parser.add_argument("--output", type=str, required=True, help="Output file path for results")
|
|
14
|
+
parser.add_argument("--use-steering", action="store_true", help="Use steering during generation")
|
|
15
|
+
parser.add_argument("--verbose", action="store_true", help="Verbose output")
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Parser for the generate-vector-from-synthetic command."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def setup_generate_vector_from_synthetic_parser(parser: argparse.ArgumentParser) -> None:
|
|
7
|
+
"""
|
|
8
|
+
Set up the generate-vector-from-synthetic command parser.
|
|
9
|
+
|
|
10
|
+
This command runs the complete pipeline:
|
|
11
|
+
1. Generate synthetic contrastive pairs for a trait
|
|
12
|
+
2. Collect activations from those pairs
|
|
13
|
+
3. Create steering vectors from the activations
|
|
14
|
+
|
|
15
|
+
All in one command.
|
|
16
|
+
"""
|
|
17
|
+
# Trait to generate pairs for
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"--trait",
|
|
20
|
+
type=str,
|
|
21
|
+
required=True,
|
|
22
|
+
help="Trait to generate contrastive pairs for (e.g., 'helpfulness', 'toxicity')"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Output
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--output",
|
|
28
|
+
type=str,
|
|
29
|
+
required=True,
|
|
30
|
+
help="Output file path for the final steering vector (JSON)"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Model configuration
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--model",
|
|
36
|
+
type=str,
|
|
37
|
+
default="meta-llama/Llama-3.2-1B-Instruct",
|
|
38
|
+
help="HuggingFace model name or path (default: meta-llama/Llama-3.2-1B-Instruct)"
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"--device",
|
|
42
|
+
type=str,
|
|
43
|
+
default="cpu",
|
|
44
|
+
help="Device to use (e.g., 'cpu', 'cuda', 'cuda:0')"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Pair generation
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--num-pairs",
|
|
50
|
+
type=int,
|
|
51
|
+
default=20,
|
|
52
|
+
help="Number of contrastive pairs to generate (default: 20)"
|
|
53
|
+
)
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"--similarity-threshold",
|
|
56
|
+
type=float,
|
|
57
|
+
default=0.8,
|
|
58
|
+
help="Cosine similarity threshold for filtering pairs (default: 0.8)"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Activation collection
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--layers",
|
|
64
|
+
type=str,
|
|
65
|
+
default=None,
|
|
66
|
+
help="Comma-separated layer indices (e.g., '8,12,16') or 'all' (default: all layers)"
|
|
67
|
+
)
|
|
68
|
+
parser.add_argument(
|
|
69
|
+
"--token-aggregation",
|
|
70
|
+
type=str,
|
|
71
|
+
choices=["average", "final", "first", "max", "continuation"],
|
|
72
|
+
default="average",
|
|
73
|
+
help="How to aggregate token activations (default: average)"
|
|
74
|
+
)
|
|
75
|
+
parser.add_argument(
|
|
76
|
+
"--prompt-strategy",
|
|
77
|
+
type=str,
|
|
78
|
+
choices=["chat_template", "direct_completion", "instruction_following", "multiple_choice", "role_playing"],
|
|
79
|
+
default="chat_template",
|
|
80
|
+
help="Prompt construction strategy (default: chat_template)"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Steering vector creation
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--method",
|
|
86
|
+
type=str,
|
|
87
|
+
choices=["caa"],
|
|
88
|
+
default="caa",
|
|
89
|
+
help="Steering method to use (default: caa)"
|
|
90
|
+
)
|
|
91
|
+
parser.add_argument(
|
|
92
|
+
"--normalize",
|
|
93
|
+
action="store_true",
|
|
94
|
+
default=True,
|
|
95
|
+
help="L2-normalize steering vectors (default: True)"
|
|
96
|
+
)
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"--no-normalize",
|
|
99
|
+
action="store_false",
|
|
100
|
+
dest="normalize",
|
|
101
|
+
help="Do not L2-normalize steering vectors"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Intermediate file handling
|
|
105
|
+
parser.add_argument(
|
|
106
|
+
"--keep-intermediate",
|
|
107
|
+
action="store_true",
|
|
108
|
+
help="Keep intermediate files (pairs and enriched pairs)"
|
|
109
|
+
)
|
|
110
|
+
parser.add_argument(
|
|
111
|
+
"--intermediate-dir",
|
|
112
|
+
type=str,
|
|
113
|
+
default=None,
|
|
114
|
+
help="Directory for intermediate files (default: same as output)"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Display options
|
|
118
|
+
parser.add_argument(
|
|
119
|
+
"--verbose",
|
|
120
|
+
action="store_true",
|
|
121
|
+
help="Enable verbose output"
|
|
122
|
+
)
|
|
123
|
+
parser.add_argument(
|
|
124
|
+
"--timing",
|
|
125
|
+
action="store_true",
|
|
126
|
+
help="Show timing information"
|
|
127
|
+
)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Parser for the generate-vector-from-task command."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def setup_generate_vector_from_task_parser(parser: argparse.ArgumentParser) -> None:
|
|
7
|
+
"""
|
|
8
|
+
Set up the generate-vector-from-task command parser.
|
|
9
|
+
|
|
10
|
+
This command runs the complete pipeline:
|
|
11
|
+
1. Generate contrastive pairs from an lm-eval task
|
|
12
|
+
2. Collect activations from those pairs
|
|
13
|
+
3. Create steering vectors from the activations
|
|
14
|
+
|
|
15
|
+
All in one command.
|
|
16
|
+
"""
|
|
17
|
+
# Task source
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"--task",
|
|
20
|
+
type=str,
|
|
21
|
+
required=True,
|
|
22
|
+
help="Name of the lm-eval task to use (e.g., 'mmlu', 'hellaswag')"
|
|
23
|
+
)
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--trait-label",
|
|
26
|
+
type=str,
|
|
27
|
+
required=True,
|
|
28
|
+
help="Label for the trait being steered (e.g., 'accuracy', 'correctness')"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Output
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--output",
|
|
34
|
+
type=str,
|
|
35
|
+
required=True,
|
|
36
|
+
help="Output file path for the final steering vector (JSON)"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Model configuration
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"--model",
|
|
42
|
+
type=str,
|
|
43
|
+
default="meta-llama/Llama-3.2-1B-Instruct",
|
|
44
|
+
help="HuggingFace model name or path (default: meta-llama/Llama-3.2-1B-Instruct)"
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--device",
|
|
48
|
+
type=str,
|
|
49
|
+
default="cpu",
|
|
50
|
+
help="Device to use (e.g., 'cpu', 'cuda', 'cuda:0')"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Pair generation
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"--num-pairs",
|
|
56
|
+
type=int,
|
|
57
|
+
default=50,
|
|
58
|
+
help="Number of contrastive pairs to generate (default: 50)"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Activation collection
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--layers",
|
|
64
|
+
type=str,
|
|
65
|
+
default=None,
|
|
66
|
+
help="Comma-separated layer indices (e.g., '8,12,16') or 'all' (default: all layers)"
|
|
67
|
+
)
|
|
68
|
+
parser.add_argument(
|
|
69
|
+
"--token-aggregation",
|
|
70
|
+
type=str,
|
|
71
|
+
choices=["average", "final", "first", "max", "continuation"],
|
|
72
|
+
default="average",
|
|
73
|
+
help="How to aggregate token activations (default: average)"
|
|
74
|
+
)
|
|
75
|
+
parser.add_argument(
|
|
76
|
+
"--prompt-strategy",
|
|
77
|
+
type=str,
|
|
78
|
+
choices=["chat_template", "direct_completion", "instruction_following", "multiple_choice", "role_playing"],
|
|
79
|
+
default="chat_template",
|
|
80
|
+
help="Prompt construction strategy (default: chat_template)"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Steering vector creation
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--method",
|
|
86
|
+
type=str,
|
|
87
|
+
choices=["caa"],
|
|
88
|
+
default="caa",
|
|
89
|
+
help="Steering method to use (default: caa)"
|
|
90
|
+
)
|
|
91
|
+
parser.add_argument(
|
|
92
|
+
"--normalize",
|
|
93
|
+
action="store_true",
|
|
94
|
+
default=True,
|
|
95
|
+
help="L2-normalize steering vectors (default: True)"
|
|
96
|
+
)
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"--no-normalize",
|
|
99
|
+
action="store_false",
|
|
100
|
+
dest="normalize",
|
|
101
|
+
help="Do not L2-normalize steering vectors"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Intermediate file handling
|
|
105
|
+
parser.add_argument(
|
|
106
|
+
"--keep-intermediate",
|
|
107
|
+
action="store_true",
|
|
108
|
+
help="Keep intermediate files (pairs and enriched pairs)"
|
|
109
|
+
)
|
|
110
|
+
parser.add_argument(
|
|
111
|
+
"--intermediate-dir",
|
|
112
|
+
type=str,
|
|
113
|
+
default=None,
|
|
114
|
+
help="Directory for intermediate files (default: same as output)"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Display options
|
|
118
|
+
parser.add_argument(
|
|
119
|
+
"--verbose",
|
|
120
|
+
action="store_true",
|
|
121
|
+
help="Enable verbose output"
|
|
122
|
+
)
|
|
123
|
+
parser.add_argument(
|
|
124
|
+
"--timing",
|
|
125
|
+
action="store_true",
|
|
126
|
+
help="Show timing information"
|
|
127
|
+
)
|