wisent 0.7.379__py3-none-any.whl → 0.7.701__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wisent/__init__.py +1 -1
- wisent/core/activations/__init__.py +22 -6
- wisent/core/activations/activations.py +21 -39
- wisent/core/activations/activations_collector.py +141 -373
- wisent/core/activations/classifier_inference_strategy.py +194 -0
- wisent/core/activations/core/atoms.py +8 -92
- wisent/core/activations/extraction_strategy.py +308 -0
- wisent/core/agent/diagnose/response_diagnostics.py +3 -3
- wisent/core/agent/diagnose.py +3 -3
- wisent/core/autonomous_agent.py +2 -2
- wisent/core/cli/agent/apply_steering.py +23 -27
- wisent/core/cli/agent/evaluate_response.py +18 -20
- wisent/core/cli/agent/train_classifier.py +18 -20
- wisent/core/cli/cluster_benchmarks.py +472 -0
- wisent/core/cli/create_steering_vector.py +13 -5
- wisent/core/cli/generate_vector_from_task.py +4 -0
- wisent/core/cli/get_activations.py +12 -36
- wisent/core/cli/method_optimizer.py +859 -0
- wisent/core/cli/optimize.py +44 -5
- wisent/core/cli/optimize_classification.py +5 -6
- wisent/core/cli/optimize_sample_size.py +8 -22
- wisent/core/cli/optimize_steering.py +429 -153
- wisent/core/cli/optimize_weights.py +65 -6
- wisent/core/cli/steering_method_trainer.py +5 -4
- wisent/core/cli/steering_search_space.py +20 -15
- wisent/core/cli/tasks.py +14 -43
- wisent/core/cli/train_unified_goodness.py +17 -18
- wisent/core/contrastive_pairs/diagnostics/control_vectors.py +1578 -173
- wisent/core/contrastive_pairs/diagnostics/linearity.py +63 -80
- wisent/core/contrastive_pairs/diagnostics/vector_quality.py +6 -5
- wisent/core/contrastive_pairs/huggingface_pairs/hf_extractor_manifest.py +5 -19
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/__init__.py +11 -5
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/apps.py +146 -32
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue.py +2 -2
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/humaneval.py +98 -57
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/code_x_glue.py +8 -8
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/freebase.py +1 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -5
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/agieval_aqua_rat.py +129 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/code_x_glue.py +11 -6
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +1 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mbpp.py +47 -6
- wisent/core/evaluators/benchmark_specific/apps_evaluator.py +133 -0
- wisent/core/evaluators/benchmark_specific/coding/metrics/evaluator.py +6 -1
- wisent/core/evaluators/benchmark_specific/conala_evaluator.py +31 -168
- wisent/core/evaluators/custom/examples/humanization_coherent.py +89 -35
- wisent/core/evaluators/oracles/truthfulqa_gen_evaluator.py +2 -20
- wisent/core/evaluators/personalization/coherence.py +46 -0
- wisent/core/hyperparameter_optimizer.py +13 -13
- wisent/core/lm_eval_harness_ground_truth.py +7 -11
- wisent/core/main.py +3 -0
- wisent/core/models/wisent_model.py +8 -7
- wisent/core/opti/methods/opti_weights.py +29 -2
- wisent/core/optuna/classifier/activation_generator.py +14 -12
- wisent/core/optuna/steering/steering_optimization.py +14 -9
- wisent/core/parser_arguments/cluster_benchmarks_parser.py +31 -0
- wisent/core/parser_arguments/generate_vector_from_task_parser.py +20 -0
- wisent/core/parser_arguments/main_parser.py +8 -0
- wisent/core/parser_arguments/optimize_steering_parser.py +117 -10
- wisent/core/parser_arguments/optimize_weights_parser.py +6 -0
- wisent/core/parser_arguments/tasks_parser.py +7 -19
- wisent/core/steering_methods/core/atoms.py +1 -2
- wisent/core/steering_methods/methods/caa.py +1 -1
- wisent/core/steering_methods/methods/hyperplane.py +74 -0
- wisent/core/steering_methods/methods/prism.py +1 -2
- wisent/core/steering_methods/methods/pulse.py +39 -8
- wisent/core/steering_methods/methods/titan.py +59 -14
- wisent/core/steering_methods/registry.py +52 -12
- wisent/core/steering_optimizer.py +15 -15
- wisent/core/trainers/steering_trainer.py +9 -18
- wisent/parameters/lm_eval/track_progress_not_lm_eval_tasks.json +19 -70
- wisent/scripts/run_quality_metrics_sweep.sh +22 -27
- wisent/tests/test_aggregation_geometry.py +236 -0
- wisent/tests/test_detector_accuracy.py +163 -0
- wisent/tests/test_geometry_exhaustive.py +1202 -0
- wisent/tests/visualize_geometry.py +255 -61
- {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/METADATA +1 -1
- {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/RECORD +82 -714
- wisent/core/activations/prompt_construction_strategy.py +0 -47
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text.py +0 -15
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_go.py +0 -64
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_java.py +0 -65
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_javascript.py +0 -65
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_php.py +0 -65
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_python.py +0 -65
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_ruby.py +0 -65
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/freebase.py +0 -99
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/instruct_humaneval.py +0 -180
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/instructhumaneval.py +0 -129
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mbpp.py +0 -142
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/agieval.py +0 -155
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/code2text.py +0 -161
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/codexglue.py +0 -107
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livemathbench.py +0 -155
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/polymath.py +0 -155
- wisent/examples/scripts/results/benchmark_descriptions.json +0 -1244
- wisent/examples/scripts/results/benchmark_evaluation_methods.json +0 -66
- wisent/examples/scripts/results/benchmark_evaluator_mapping.json +0 -2781
- wisent/examples/scripts/results/benchmark_evaluator_mapping_updated.json +0 -30536
- wisent/examples/scripts/results/benchmark_evaluators_clean.json +0 -469
- wisent/examples/scripts/results/benchmark_methods_summary.json +0 -260
- wisent/examples/scripts/results/benchmark_pair_creation_methods.json +0 -66
- wisent/examples/scripts/results/benchmark_pair_totals.json +0 -269
- wisent/examples/scripts/results/benchmark_tags.json +0 -917
- wisent/examples/scripts/results/benchmark_test_summary_nov4.json +0 -71
- wisent/examples/scripts/results/coding_benchmarks_test_code_status.json +0 -150
- wisent/examples/scripts/results/failing_benchmarks.json +0 -946
- wisent/examples/scripts/results/failing_benchmarks_list.json +0 -41
- wisent/examples/scripts/results/failing_benchmarks_test_results.json +0 -945
- wisent/examples/scripts/results/missing_benchmark_tags.json +0 -341
- wisent/examples/scripts/results/test_20_newsgroups_evaluation.json +0 -30
- wisent/examples/scripts/results/test_20_newsgroups_pairs.json +0 -8
- wisent/examples/scripts/results/test_AraDICE_evaluation.json +0 -51
- wisent/examples/scripts/results/test_AraDICE_pairs.json +0 -14
- wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_evaluation.json +0 -30
- wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_pairs.json +0 -8
- wisent/examples/scripts/results/test_ArabCulture_evaluation.json +0 -51
- wisent/examples/scripts/results/test_ArabCulture_pairs.json +0 -14
- wisent/examples/scripts/results/test_Tag_evaluation.json +0 -30
- wisent/examples/scripts/results/test_Tag_pairs.json +0 -8
- wisent/examples/scripts/results/test_aclue_evaluation.json +0 -51
- wisent/examples/scripts/results/test_aclue_pairs.json +0 -14
- wisent/examples/scripts/results/test_acp_bench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_acp_bench_hard_evaluation.json +0 -51
- wisent/examples/scripts/results/test_acp_bench_hard_pairs.json +0 -14
- wisent/examples/scripts/results/test_acp_bench_pairs.json +0 -14
- wisent/examples/scripts/results/test_advanced_ai_risk_evaluation.json +0 -51
- wisent/examples/scripts/results/test_advanced_ai_risk_pairs.json +0 -14
- wisent/examples/scripts/results/test_aexams_evaluation.json +0 -51
- wisent/examples/scripts/results/test_aexams_pairs.json +0 -14
- wisent/examples/scripts/results/test_afrimgsm_direct_amh_evaluation.json +0 -30
- wisent/examples/scripts/results/test_afrimgsm_direct_amh_pairs.json +0 -8
- wisent/examples/scripts/results/test_afrimmlu_direct_amh_evaluation.json +0 -30
- wisent/examples/scripts/results/test_afrimmlu_direct_amh_pairs.json +0 -8
- wisent/examples/scripts/results/test_afrixnli_en_direct_amh_evaluation.json +0 -30
- wisent/examples/scripts/results/test_afrixnli_en_direct_amh_pairs.json +0 -8
- wisent/examples/scripts/results/test_ag_news_evaluation.json +0 -30
- wisent/examples/scripts/results/test_ag_news_pairs.json +0 -8
- wisent/examples/scripts/results/test_agieval_evaluation.json +0 -51
- wisent/examples/scripts/results/test_agieval_pairs.json +0 -14
- wisent/examples/scripts/results/test_aime2024_evaluation.json +0 -30
- wisent/examples/scripts/results/test_aime2024_pairs.json +0 -8
- wisent/examples/scripts/results/test_aime2025_evaluation.json +0 -30
- wisent/examples/scripts/results/test_aime2025_pairs.json +0 -8
- wisent/examples/scripts/results/test_aime_evaluation.json +0 -30
- wisent/examples/scripts/results/test_aime_pairs.json +0 -8
- wisent/examples/scripts/results/test_anagrams1_evaluation.json +0 -30
- wisent/examples/scripts/results/test_anagrams1_pairs.json +0 -8
- wisent/examples/scripts/results/test_anagrams2_evaluation.json +0 -30
- wisent/examples/scripts/results/test_anagrams2_pairs.json +0 -8
- wisent/examples/scripts/results/test_anli_evaluation.json +0 -30
- wisent/examples/scripts/results/test_anli_pairs.json +0 -8
- wisent/examples/scripts/results/test_apps_evaluation.json +0 -30
- wisent/examples/scripts/results/test_apps_pairs.json +0 -8
- wisent/examples/scripts/results/test_arabic_exams_evaluation.json +0 -30
- wisent/examples/scripts/results/test_arabic_exams_pairs.json +0 -8
- wisent/examples/scripts/results/test_arabic_leaderboard_complete_evaluation.json +0 -51
- wisent/examples/scripts/results/test_arabic_leaderboard_complete_pairs.json +0 -14
- wisent/examples/scripts/results/test_arabic_leaderboard_light_evaluation.json +0 -51
- wisent/examples/scripts/results/test_arabic_leaderboard_light_pairs.json +0 -14
- wisent/examples/scripts/results/test_arabicmmlu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_arabicmmlu_pairs.json +0 -14
- wisent/examples/scripts/results/test_aradice/test_aradice_evaluation.json +0 -51
- wisent/examples/scripts/results/test_aradice/test_aradice_pairs.json +0 -14
- wisent/examples/scripts/results/test_aradice3/test_aradice_evaluation.json +0 -51
- wisent/examples/scripts/results/test_aradice3/test_aradice_pairs.json +0 -14
- wisent/examples/scripts/results/test_arc_ar_evaluation.json +0 -30
- wisent/examples/scripts/results/test_arc_ar_pairs.json +0 -8
- wisent/examples/scripts/results/test_arc_challenge_evaluation.json +0 -30
- wisent/examples/scripts/results/test_arc_challenge_pairs.json +0 -8
- wisent/examples/scripts/results/test_arc_easy_evaluation.json +0 -30
- wisent/examples/scripts/results/test_arc_easy_pairs.json +0 -8
- wisent/examples/scripts/results/test_argument_topic_evaluation.json +0 -30
- wisent/examples/scripts/results/test_argument_topic_pairs.json +0 -8
- wisent/examples/scripts/results/test_arithmetic_evaluation.json +0 -51
- wisent/examples/scripts/results/test_arithmetic_pairs.json +0 -14
- wisent/examples/scripts/results/test_asdiv_evaluation.json +0 -30
- wisent/examples/scripts/results/test_asdiv_pairs.json +0 -8
- wisent/examples/scripts/results/test_assin_entailment_evaluation.json +0 -30
- wisent/examples/scripts/results/test_assin_entailment_pairs.json +0 -8
- wisent/examples/scripts/results/test_atis_evaluation.json +0 -30
- wisent/examples/scripts/results/test_atis_pairs.json +0 -8
- wisent/examples/scripts/results/test_babi_evaluation.json +0 -30
- wisent/examples/scripts/results/test_babi_pairs.json +0 -8
- wisent/examples/scripts/results/test_babilong_evaluation.json +0 -30
- wisent/examples/scripts/results/test_babilong_pairs.json +0 -8
- wisent/examples/scripts/results/test_bangla_mmlu_evaluation.json +0 -30
- wisent/examples/scripts/results/test_bangla_mmlu_pairs.json +0 -8
- wisent/examples/scripts/results/test_banking77_evaluation.json +0 -30
- wisent/examples/scripts/results/test_banking77_pairs.json +0 -8
- wisent/examples/scripts/results/test_basque/test_basque-glue_pairs.json +0 -14
- wisent/examples/scripts/results/test_basque-glue_evaluation.json +0 -51
- wisent/examples/scripts/results/test_basque-glue_pairs.json +0 -14
- wisent/examples/scripts/results/test_basque2/test_basque-glue_evaluation.json +0 -51
- wisent/examples/scripts/results/test_basque2/test_basque-glue_pairs.json +0 -14
- wisent/examples/scripts/results/test_basque_bench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_basque_bench_pairs.json +0 -14
- wisent/examples/scripts/results/test_basque_glue/test_basque-glue_evaluation.json +0 -51
- wisent/examples/scripts/results/test_basque_glue/test_basque-glue_pairs.json +0 -14
- wisent/examples/scripts/results/test_basqueglue_evaluation.json +0 -51
- wisent/examples/scripts/results/test_basqueglue_pairs.json +0 -14
- wisent/examples/scripts/results/test_bbh_evaluation.json +0 -51
- wisent/examples/scripts/results/test_bbh_pairs.json +0 -14
- wisent/examples/scripts/results/test_bbq_evaluation.json +0 -30
- wisent/examples/scripts/results/test_bbq_pairs.json +0 -8
- wisent/examples/scripts/results/test_bec2016eu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_bec2016eu_pairs.json +0 -14
- wisent/examples/scripts/results/test_belebele_evaluation.json +0 -51
- wisent/examples/scripts/results/test_belebele_pairs.json +0 -14
- wisent/examples/scripts/results/test_benchmarks_evaluation.json +0 -51
- wisent/examples/scripts/results/test_benchmarks_pairs.json +0 -14
- wisent/examples/scripts/results/test_bertaqa_evaluation.json +0 -51
- wisent/examples/scripts/results/test_bertaqa_pairs.json +0 -14
- wisent/examples/scripts/results/test_bhtc_v2_evaluation.json +0 -30
- wisent/examples/scripts/results/test_bhtc_v2_pairs.json +0 -8
- wisent/examples/scripts/results/test_bigbench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_bigbench_pairs.json +0 -14
- wisent/examples/scripts/results/test_blimp_evaluation.json +0 -51
- wisent/examples/scripts/results/test_blimp_pairs.json +0 -14
- wisent/examples/scripts/results/test_boolq/test_boolq_evaluation.json +0 -30
- wisent/examples/scripts/results/test_boolq/test_boolq_pairs.json +0 -8
- wisent/examples/scripts/results/test_boolq-seq2seq_evaluation.json +0 -30
- wisent/examples/scripts/results/test_boolq-seq2seq_pairs.json +0 -8
- wisent/examples/scripts/results/test_boolq_evaluation.json +0 -30
- wisent/examples/scripts/results/test_boolq_pairs.json +0 -8
- wisent/examples/scripts/results/test_c4_evaluation.json +0 -30
- wisent/examples/scripts/results/test_c4_pairs.json +0 -8
- wisent/examples/scripts/results/test_cabreu_evaluation.json +0 -30
- wisent/examples/scripts/results/test_cabreu_pairs.json +0 -8
- wisent/examples/scripts/results/test_careqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_careqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_catalan_bench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_catalan_bench_pairs.json +0 -14
- wisent/examples/scripts/results/test_catalanqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_catalanqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_catcola_evaluation.json +0 -30
- wisent/examples/scripts/results/test_catcola_pairs.json +0 -8
- wisent/examples/scripts/results/test_cb_evaluation.json +0 -30
- wisent/examples/scripts/results/test_cb_pairs.json +0 -8
- wisent/examples/scripts/results/test_ceval/test_ceval_evaluation.json +0 -51
- wisent/examples/scripts/results/test_ceval/test_ceval_pairs.json +0 -14
- wisent/examples/scripts/results/test_ceval_accountant/test_ceval-valid_accountant_evaluation.json +0 -30
- wisent/examples/scripts/results/test_ceval_accountant/test_ceval-valid_accountant_pairs.json +0 -8
- wisent/examples/scripts/results/test_ceval_evaluation.json +0 -51
- wisent/examples/scripts/results/test_ceval_pairs.json +0 -14
- wisent/examples/scripts/results/test_ceval_valid/test_ceval_valid_evaluation.json +0 -51
- wisent/examples/scripts/results/test_ceval_valid/test_ceval_valid_pairs.json +0 -14
- wisent/examples/scripts/results/test_chain_of_thought_evaluation.json +0 -51
- wisent/examples/scripts/results/test_chain_of_thought_pairs.json +0 -14
- wisent/examples/scripts/results/test_chartqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_chartqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_claim_stance_topic_evaluation.json +0 -30
- wisent/examples/scripts/results/test_claim_stance_topic_pairs.json +0 -8
- wisent/examples/scripts/results/test_cmmlu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_cmmlu_pairs.json +0 -14
- wisent/examples/scripts/results/test_cnn_dailymail_evaluation.json +0 -30
- wisent/examples/scripts/results/test_cnn_dailymail_pairs.json +0 -8
- wisent/examples/scripts/results/test_cocoteros_es_evaluation.json +0 -30
- wisent/examples/scripts/results/test_cocoteros_es_pairs.json +0 -8
- wisent/examples/scripts/results/test_codexglue_code_to_text_go_evaluation.json +0 -30
- wisent/examples/scripts/results/test_codexglue_code_to_text_go_pairs.json +0 -8
- wisent/examples/scripts/results/test_codexglue_code_to_text_java_evaluation.json +0 -30
- wisent/examples/scripts/results/test_codexglue_code_to_text_java_pairs.json +0 -8
- wisent/examples/scripts/results/test_codexglue_code_to_text_javascript_evaluation.json +0 -30
- wisent/examples/scripts/results/test_codexglue_code_to_text_javascript_pairs.json +0 -8
- wisent/examples/scripts/results/test_codexglue_code_to_text_php_evaluation.json +0 -30
- wisent/examples/scripts/results/test_codexglue_code_to_text_php_pairs.json +0 -8
- wisent/examples/scripts/results/test_codexglue_code_to_text_python_evaluation.json +0 -30
- wisent/examples/scripts/results/test_codexglue_code_to_text_python_pairs.json +0 -8
- wisent/examples/scripts/results/test_codexglue_code_to_text_ruby_evaluation.json +0 -30
- wisent/examples/scripts/results/test_codexglue_code_to_text_ruby_pairs.json +0 -8
- wisent/examples/scripts/results/test_coedit_gec_evaluation.json +0 -30
- wisent/examples/scripts/results/test_coedit_gec_pairs.json +0 -8
- wisent/examples/scripts/results/test_cola_evaluation.json +0 -30
- wisent/examples/scripts/results/test_cola_pairs.json +0 -8
- wisent/examples/scripts/results/test_commonsense_qa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_commonsense_qa_pairs.json +0 -8
- wisent/examples/scripts/results/test_conala_evaluation.json +0 -30
- wisent/examples/scripts/results/test_conala_pairs.json +0 -8
- wisent/examples/scripts/results/test_concode_evaluation.json +0 -30
- wisent/examples/scripts/results/test_concode_pairs.json +0 -8
- wisent/examples/scripts/results/test_copa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_copa_pairs.json +0 -8
- wisent/examples/scripts/results/test_copal_id_evaluation.json +0 -30
- wisent/examples/scripts/results/test_copal_id_pairs.json +0 -8
- wisent/examples/scripts/results/test_coqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_coqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_coqcat_evaluation.json +0 -30
- wisent/examples/scripts/results/test_coqcat_pairs.json +0 -8
- wisent/examples/scripts/results/test_crows_pairs_evaluation.json +0 -51
- wisent/examples/scripts/results/test_crows_pairs_pairs.json +0 -14
- wisent/examples/scripts/results/test_csatqa_evaluation.json +0 -51
- wisent/examples/scripts/results/test_csatqa_pairs.json +0 -14
- wisent/examples/scripts/results/test_cycle_letters_evaluation.json +0 -30
- wisent/examples/scripts/results/test_cycle_letters_pairs.json +0 -8
- wisent/examples/scripts/results/test_darija_bench/test_darija_bench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_darija_bench/test_darija_bench_pairs.json +0 -14
- wisent/examples/scripts/results/test_darija_bench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_darija_bench_pairs.json +0 -14
- wisent/examples/scripts/results/test_darijahellaswag_evaluation.json +0 -30
- wisent/examples/scripts/results/test_darijahellaswag_pairs.json +0 -8
- wisent/examples/scripts/results/test_darijammlu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_darijammlu_pairs.json +0 -14
- wisent/examples/scripts/results/test_dbpedia_14_evaluation.json +0 -30
- wisent/examples/scripts/results/test_dbpedia_14_pairs.json +0 -8
- wisent/examples/scripts/results/test_drop_evaluation.json +0 -30
- wisent/examples/scripts/results/test_drop_pairs.json +0 -8
- wisent/examples/scripts/results/test_ds1000_evaluation.json +0 -30
- wisent/examples/scripts/results/test_ds1000_pairs.json +0 -8
- wisent/examples/scripts/results/test_egyhellaswag_evaluation.json +0 -30
- wisent/examples/scripts/results/test_egyhellaswag_pairs.json +0 -8
- wisent/examples/scripts/results/test_egymmlu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_egymmlu_pairs.json +0 -14
- wisent/examples/scripts/results/test_epec_koref_bin_evaluation.json +0 -30
- wisent/examples/scripts/results/test_epec_koref_bin_pairs.json +0 -8
- wisent/examples/scripts/results/test_eq_bench_evaluation.json +0 -30
- wisent/examples/scripts/results/test_eq_bench_pairs.json +0 -8
- wisent/examples/scripts/results/test_escola_evaluation.json +0 -30
- wisent/examples/scripts/results/test_escola_pairs.json +0 -8
- wisent/examples/scripts/results/test_ethics_cm_evaluation.json +0 -30
- wisent/examples/scripts/results/test_ethics_cm_pairs.json +0 -8
- wisent/examples/scripts/results/test_ethos_binary_evaluation.json +0 -30
- wisent/examples/scripts/results/test_ethos_binary_pairs.json +0 -8
- wisent/examples/scripts/results/test_eus_exams/test_eus_exams_evaluation.json +0 -51
- wisent/examples/scripts/results/test_eus_exams/test_eus_exams_pairs.json +0 -14
- wisent/examples/scripts/results/test_eus_exams_es_evaluation.json +0 -51
- wisent/examples/scripts/results/test_eus_exams_es_pairs.json +0 -14
- wisent/examples/scripts/results/test_eus_exams_evaluation.json +0 -51
- wisent/examples/scripts/results/test_eus_exams_pairs.json +0 -14
- wisent/examples/scripts/results/test_eus_proficiency_evaluation.json +0 -30
- wisent/examples/scripts/results/test_eus_proficiency_pairs.json +0 -8
- wisent/examples/scripts/results/test_eus_reading_evaluation.json +0 -30
- wisent/examples/scripts/results/test_eus_reading_pairs.json +0 -8
- wisent/examples/scripts/results/test_eus_trivia_evaluation.json +0 -30
- wisent/examples/scripts/results/test_eus_trivia_pairs.json +0 -8
- wisent/examples/scripts/results/test_evalita-mp_evaluation.json +0 -51
- wisent/examples/scripts/results/test_evalita-mp_pairs.json +0 -14
- wisent/examples/scripts/results/test_evalita-sp_sum_task_fp-small_p1_evaluation.json +0 -30
- wisent/examples/scripts/results/test_evalita-sp_sum_task_fp-small_p1_pairs.json +0 -8
- wisent/examples/scripts/results/test_evalita_LLM_evaluation.json +0 -51
- wisent/examples/scripts/results/test_evalita_LLM_pairs.json +0 -14
- wisent/examples/scripts/results/test_evalita_llm/test_evalita_llm_evaluation.json +0 -51
- wisent/examples/scripts/results/test_evalita_llm/test_evalita_llm_pairs.json +0 -14
- wisent/examples/scripts/results/test_evalita_mp/test_evalita-mp_te_prompt-1_evaluation.json +0 -30
- wisent/examples/scripts/results/test_evalita_mp/test_evalita-mp_te_prompt-1_pairs.json +0 -8
- wisent/examples/scripts/results/test_evalita_mp2/test_evalita_mp_evaluation.json +0 -51
- wisent/examples/scripts/results/test_evalita_mp2/test_evalita_mp_pairs.json +0 -14
- wisent/examples/scripts/results/test_evalita_sp2/test_evalita-sp_sum_task_fp-small_p1_evaluation.json +0 -30
- wisent/examples/scripts/results/test_evalita_sp2/test_evalita-sp_sum_task_fp-small_p1_pairs.json +0 -8
- wisent/examples/scripts/results/test_fda_evaluation.json +0 -30
- wisent/examples/scripts/results/test_fda_pairs.json +0 -8
- wisent/examples/scripts/results/test_financial_tweets_evaluation.json +0 -30
- wisent/examples/scripts/results/test_financial_tweets_pairs.json +0 -8
- wisent/examples/scripts/results/test_fld/test_fld_evaluation.json +0 -30
- wisent/examples/scripts/results/test_fld/test_fld_pairs.json +0 -8
- wisent/examples/scripts/results/test_fld_evaluation.json +0 -30
- wisent/examples/scripts/results/test_fld_fixed/test_fld_evaluation.json +0 -30
- wisent/examples/scripts/results/test_fld_fixed/test_fld_pairs.json +0 -8
- wisent/examples/scripts/results/test_fld_pairs.json +0 -8
- wisent/examples/scripts/results/test_flores_evaluation.json +0 -51
- wisent/examples/scripts/results/test_flores_pairs.json +0 -14
- wisent/examples/scripts/results/test_freebase_evaluation.json +0 -30
- wisent/examples/scripts/results/test_freebase_pairs.json +0 -8
- wisent/examples/scripts/results/test_french_bench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_french_bench_pairs.json +0 -14
- wisent/examples/scripts/results/test_galcola_evaluation.json +0 -30
- wisent/examples/scripts/results/test_galcola_pairs.json +0 -8
- wisent/examples/scripts/results/test_galician_bench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_galician_bench_pairs.json +0 -14
- wisent/examples/scripts/results/test_glianorex_evaluation.json +0 -30
- wisent/examples/scripts/results/test_glianorex_pairs.json +0 -8
- wisent/examples/scripts/results/test_global_mmlu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_global_mmlu_pairs.json +0 -14
- wisent/examples/scripts/results/test_glue_evaluation.json +0 -51
- wisent/examples/scripts/results/test_glue_pairs.json +0 -14
- wisent/examples/scripts/results/test_gpqa_evaluation.json +0 -51
- wisent/examples/scripts/results/test_gpqa_pairs.json +0 -14
- wisent/examples/scripts/results/test_gpt3_translation_benchmarks_evaluation.json +0 -51
- wisent/examples/scripts/results/test_gpt3_translation_benchmarks_pairs.json +0 -14
- wisent/examples/scripts/results/test_groundcocoa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_groundcocoa_pairs.json +0 -8
- wisent/examples/scripts/results/test_gsm8k_evaluation.json +0 -30
- wisent/examples/scripts/results/test_gsm8k_pairs.json +0 -8
- wisent/examples/scripts/results/test_haerae_evaluation.json +0 -51
- wisent/examples/scripts/results/test_haerae_pairs.json +0 -14
- wisent/examples/scripts/results/test_headqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_headqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_hellaswag_evaluation.json +0 -30
- wisent/examples/scripts/results/test_hellaswag_pairs.json +0 -8
- wisent/examples/scripts/results/test_hendrycks_ethics_evaluation.json +0 -51
- wisent/examples/scripts/results/test_hendrycks_ethics_pairs.json +0 -14
- wisent/examples/scripts/results/test_hendrycks_math_evaluation.json +0 -51
- wisent/examples/scripts/results/test_hendrycks_math_pairs.json +0 -14
- wisent/examples/scripts/results/test_histoires_morales_evaluation.json +0 -30
- wisent/examples/scripts/results/test_histoires_morales_pairs.json +0 -8
- wisent/examples/scripts/results/test_hmmt_evaluation.json +0 -30
- wisent/examples/scripts/results/test_hmmt_feb_2025_evaluation.json +0 -30
- wisent/examples/scripts/results/test_hmmt_feb_2025_pairs.json +0 -8
- wisent/examples/scripts/results/test_hmmt_pairs.json +0 -8
- wisent/examples/scripts/results/test_hrm8k_evaluation.json +0 -51
- wisent/examples/scripts/results/test_hrm8k_pairs.json +0 -14
- wisent/examples/scripts/results/test_humaneval_evaluation.json +0 -30
- wisent/examples/scripts/results/test_humaneval_pairs.json +0 -8
- wisent/examples/scripts/results/test_humaneval_plus_evaluation.json +0 -30
- wisent/examples/scripts/results/test_humaneval_plus_pairs.json +0 -8
- wisent/examples/scripts/results/test_ifeval_evaluation.json +0 -30
- wisent/examples/scripts/results/test_ifeval_pairs.json +0 -8
- wisent/examples/scripts/results/test_instruct_humaneval/test_instruct_humaneval_evaluation.json +0 -30
- wisent/examples/scripts/results/test_instruct_humaneval/test_instruct_humaneval_pairs.json +0 -8
- wisent/examples/scripts/results/test_instruct_humaneval_evaluation.json +0 -30
- wisent/examples/scripts/results/test_instruct_humaneval_pairs.json +0 -8
- wisent/examples/scripts/results/test_inverse_scaling_evaluation.json +0 -51
- wisent/examples/scripts/results/test_inverse_scaling_hindsight_neglect_10shot_evaluation.json +0 -30
- wisent/examples/scripts/results/test_inverse_scaling_hindsight_neglect_10shot_pairs.json +0 -8
- wisent/examples/scripts/results/test_inverse_scaling_mc/test_inverse_scaling_mc_evaluation.json +0 -51
- wisent/examples/scripts/results/test_inverse_scaling_mc/test_inverse_scaling_mc_pairs.json +0 -14
- wisent/examples/scripts/results/test_inverse_scaling_pairs.json +0 -14
- wisent/examples/scripts/results/test_iwslt2017-ar-en_evaluation.json +0 -30
- wisent/examples/scripts/results/test_iwslt2017-ar-en_pairs.json +0 -8
- wisent/examples/scripts/results/test_iwslt2017-en-ar_evaluation.json +0 -30
- wisent/examples/scripts/results/test_iwslt2017-en-ar_pairs.json +0 -8
- wisent/examples/scripts/results/test_iwslt2017_ar_en/test_iwslt2017-ar-en_evaluation.json +0 -30
- wisent/examples/scripts/results/test_iwslt2017_ar_en/test_iwslt2017-ar-en_pairs.json +0 -8
- wisent/examples/scripts/results/test_iwslt2017_en_ar/test_iwslt2017-en-ar_evaluation.json +0 -30
- wisent/examples/scripts/results/test_iwslt2017_en_ar/test_iwslt2017-en-ar_pairs.json +0 -8
- wisent/examples/scripts/results/test_iwslt2017_group/test_iwslt2017_evaluation.json +0 -30
- wisent/examples/scripts/results/test_iwslt2017_group/test_iwslt2017_pairs.json +0 -8
- wisent/examples/scripts/results/test_japanese_leaderboard_evaluation.json +0 -51
- wisent/examples/scripts/results/test_japanese_leaderboard_pairs.json +0 -14
- wisent/examples/scripts/results/test_jsonschema_bench/test_jsonschema_bench_evaluation.json +0 -30
- wisent/examples/scripts/results/test_jsonschema_bench/test_jsonschema_bench_pairs.json +0 -8
- wisent/examples/scripts/results/test_jsonschema_bench_evaluation.json +0 -30
- wisent/examples/scripts/results/test_jsonschema_bench_final/test_jsonschema_bench_evaluation.json +0 -30
- wisent/examples/scripts/results/test_jsonschema_bench_final/test_jsonschema_bench_pairs.json +0 -8
- wisent/examples/scripts/results/test_jsonschema_bench_pairs.json +0 -8
- wisent/examples/scripts/results/test_kbl_evaluation.json +0 -51
- wisent/examples/scripts/results/test_kbl_fixed/test_kbl_evaluation.json +0 -51
- wisent/examples/scripts/results/test_kbl_fixed/test_kbl_pairs.json +0 -14
- wisent/examples/scripts/results/test_kbl_pairs.json +0 -14
- wisent/examples/scripts/results/test_kmmlu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_kmmlu_pairs.json +0 -14
- wisent/examples/scripts/results/test_kobest_evaluation.json +0 -51
- wisent/examples/scripts/results/test_kobest_pairs.json +0 -14
- wisent/examples/scripts/results/test_kormedmcqa/test_kormedmcqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_kormedmcqa/test_kormedmcqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_kormedmcqa_dentist/test_kormedmcqa_dentist_evaluation.json +0 -30
- wisent/examples/scripts/results/test_kormedmcqa_dentist/test_kormedmcqa_dentist_pairs.json +0 -8
- wisent/examples/scripts/results/test_kormedmcqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_kormedmcqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_lambada_cloze_evaluation.json +0 -30
- wisent/examples/scripts/results/test_lambada_cloze_pairs.json +0 -8
- wisent/examples/scripts/results/test_lambada_evaluation.json +0 -30
- wisent/examples/scripts/results/test_lambada_final/test_lambada_openai_mt_stablelm_en_evaluation.json +0 -30
- wisent/examples/scripts/results/test_lambada_final/test_lambada_openai_mt_stablelm_en_pairs.json +0 -8
- wisent/examples/scripts/results/test_lambada_multilingual/test_lambada_multilingual_evaluation.json +0 -51
- wisent/examples/scripts/results/test_lambada_multilingual/test_lambada_multilingual_pairs.json +0 -14
- wisent/examples/scripts/results/test_lambada_multilingual_evaluation.json +0 -51
- wisent/examples/scripts/results/test_lambada_multilingual_pairs.json +0 -14
- wisent/examples/scripts/results/test_lambada_multilingual_stablelm_evaluation.json +0 -51
- wisent/examples/scripts/results/test_lambada_multilingual_stablelm_pairs.json +0 -14
- wisent/examples/scripts/results/test_lambada_openai_evaluation.json +0 -30
- wisent/examples/scripts/results/test_lambada_openai_pairs.json +0 -8
- wisent/examples/scripts/results/test_lambada_pairs.json +0 -8
- wisent/examples/scripts/results/test_lambada_stablelm_en_fixed/test_lambada_openai_mt_stablelm_en_evaluation.json +0 -30
- wisent/examples/scripts/results/test_lambada_stablelm_en_fixed/test_lambada_openai_mt_stablelm_en_pairs.json +0 -8
- wisent/examples/scripts/results/test_lambada_stablelm_fixed/test_lambada_openai_mt_stablelm_en_evaluation.json +0 -30
- wisent/examples/scripts/results/test_lambada_stablelm_fixed/test_lambada_openai_mt_stablelm_en_pairs.json +0 -8
- wisent/examples/scripts/results/test_lambada_standard_evaluation.json +0 -30
- wisent/examples/scripts/results/test_lambada_standard_pairs.json +0 -8
- wisent/examples/scripts/results/test_leaderboard_evaluation.json +0 -51
- wisent/examples/scripts/results/test_leaderboard_pairs.json +0 -14
- wisent/examples/scripts/results/test_libra/test_libra_evaluation.json +0 -51
- wisent/examples/scripts/results/test_libra/test_libra_pairs.json +0 -14
- wisent/examples/scripts/results/test_libra_evaluation.json +0 -51
- wisent/examples/scripts/results/test_libra_pairs.json +0 -14
- wisent/examples/scripts/results/test_lingoly_evaluation.json +0 -30
- wisent/examples/scripts/results/test_lingoly_pairs.json +0 -8
- wisent/examples/scripts/results/test_livecodebench_evaluation.json +0 -30
- wisent/examples/scripts/results/test_livecodebench_pairs.json +0 -8
- wisent/examples/scripts/results/test_livemathbench_cnmo_en_evaluation.json +0 -30
- wisent/examples/scripts/results/test_livemathbench_cnmo_en_pairs.json +0 -8
- wisent/examples/scripts/results/test_livemathbench_cnmo_zh_evaluation.json +0 -30
- wisent/examples/scripts/results/test_livemathbench_cnmo_zh_pairs.json +0 -8
- wisent/examples/scripts/results/test_llama_evaluation.json +0 -30
- wisent/examples/scripts/results/test_llama_pairs.json +0 -8
- wisent/examples/scripts/results/test_logiqa2_evaluation.json +0 -30
- wisent/examples/scripts/results/test_logiqa2_pairs.json +0 -8
- wisent/examples/scripts/results/test_logiqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_logiqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_m_mmlu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_m_mmlu_pairs.json +0 -14
- wisent/examples/scripts/results/test_mastermind/test_mastermind_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mastermind/test_mastermind_pairs.json +0 -14
- wisent/examples/scripts/results/test_mastermind_24_easy/test_mastermind_24_easy_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mastermind_24_easy/test_mastermind_24_easy_pairs.json +0 -8
- wisent/examples/scripts/results/test_mastermind_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mastermind_pairs.json +0 -14
- wisent/examples/scripts/results/test_math500_evaluation.json +0 -30
- wisent/examples/scripts/results/test_math500_pairs.json +0 -8
- wisent/examples/scripts/results/test_math_evaluation.json +0 -30
- wisent/examples/scripts/results/test_math_pairs.json +0 -8
- wisent/examples/scripts/results/test_mathqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mathqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_mbpp_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mbpp_pairs.json +0 -8
- wisent/examples/scripts/results/test_mbpp_plus_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mbpp_plus_pairs.json +0 -8
- wisent/examples/scripts/results/test_mc_taco_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mc_taco_pairs.json +0 -8
- wisent/examples/scripts/results/test_med_concepts_qa/test_med_concepts_qa_evaluation.json +0 -51
- wisent/examples/scripts/results/test_med_concepts_qa/test_med_concepts_qa_pairs.json +0 -14
- wisent/examples/scripts/results/test_med_concepts_qa_atc_easy/test_med_concepts_qa_atc_easy_evaluation.json +0 -30
- wisent/examples/scripts/results/test_med_concepts_qa_atc_easy/test_med_concepts_qa_atc_easy_pairs.json +0 -8
- wisent/examples/scripts/results/test_med_concepts_qa_evaluation.json +0 -51
- wisent/examples/scripts/results/test_med_concepts_qa_pairs.json +0 -14
- wisent/examples/scripts/results/test_meddialog_evaluation.json +0 -30
- wisent/examples/scripts/results/test_meddialog_pairs.json +0 -8
- wisent/examples/scripts/results/test_meddialog_raw_perplexity/test_meddialog_raw_perplexity_evaluation.json +0 -30
- wisent/examples/scripts/results/test_meddialog_raw_perplexity/test_meddialog_raw_perplexity_pairs.json +0 -8
- wisent/examples/scripts/results/test_mediqa_qa2019_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mediqa_qa2019_pairs.json +0 -8
- wisent/examples/scripts/results/test_medmcqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_medmcqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_medqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_medqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_medtext_evaluation.json +0 -30
- wisent/examples/scripts/results/test_medtext_pairs.json +0 -8
- wisent/examples/scripts/results/test_mela_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mela_pairs.json +0 -14
- wisent/examples/scripts/results/test_meqsum_evaluation.json +0 -30
- wisent/examples/scripts/results/test_meqsum_pairs.json +0 -8
- wisent/examples/scripts/results/test_mercury_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mercury_pairs.json +0 -8
- wisent/examples/scripts/results/test_metabench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_metabench_pairs.json +0 -14
- wisent/examples/scripts/results/test_mgsm_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mgsm_pairs.json +0 -14
- wisent/examples/scripts/results/test_mimic_repsum_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mimic_repsum_pairs.json +0 -8
- wisent/examples/scripts/results/test_minerva_math_evaluation.json +0 -51
- wisent/examples/scripts/results/test_minerva_math_pairs.json +0 -14
- wisent/examples/scripts/results/test_mlqa_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mlqa_pairs.json +0 -14
- wisent/examples/scripts/results/test_mmlu-pro-plus_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mmlu-pro-plus_pairs.json +0 -14
- wisent/examples/scripts/results/test_mmlu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mmlu_pairs.json +0 -14
- wisent/examples/scripts/results/test_mmlu_pro_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mmlu_pro_pairs.json +0 -14
- wisent/examples/scripts/results/test_mmlu_prox_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mmlu_prox_pairs.json +0 -14
- wisent/examples/scripts/results/test_mmlusr_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mmlusr_pairs.json +0 -8
- wisent/examples/scripts/results/test_mmmu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_mmmu_pairs.json +0 -14
- wisent/examples/scripts/results/test_mnli_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mnli_pairs.json +0 -8
- wisent/examples/scripts/results/test_model_written_evals_evaluation.json +0 -51
- wisent/examples/scripts/results/test_model_written_evals_pairs.json +0 -14
- wisent/examples/scripts/results/test_moral_stories_evaluation.json +0 -30
- wisent/examples/scripts/results/test_moral_stories_pairs.json +0 -8
- wisent/examples/scripts/results/test_mts_dialog_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mts_dialog_pairs.json +0 -8
- wisent/examples/scripts/results/test_multiblimp_evaluation.json +0 -51
- wisent/examples/scripts/results/test_multiblimp_pairs.json +0 -14
- wisent/examples/scripts/results/test_multimedqa_evaluation.json +0 -51
- wisent/examples/scripts/results/test_multimedqa_pairs.json +0 -14
- wisent/examples/scripts/results/test_multipl_e_evaluation.json +0 -30
- wisent/examples/scripts/results/test_multipl_e_pairs.json +0 -8
- wisent/examples/scripts/results/test_mutual_evaluation.json +0 -30
- wisent/examples/scripts/results/test_mutual_pairs.json +0 -8
- wisent/examples/scripts/results/test_non_greedy_robustness_agieval_aqua_rat_evaluation.json +0 -30
- wisent/examples/scripts/results/test_non_greedy_robustness_agieval_aqua_rat_pairs.json +0 -8
- wisent/examples/scripts/results/test_noreval_evaluation.json +0 -51
- wisent/examples/scripts/results/test_noreval_pairs.json +0 -14
- wisent/examples/scripts/results/test_noticia_evaluation.json +0 -30
- wisent/examples/scripts/results/test_noticia_pairs.json +0 -8
- wisent/examples/scripts/results/test_nq_open_evaluation.json +0 -30
- wisent/examples/scripts/results/test_nq_open_pairs.json +0 -8
- wisent/examples/scripts/results/test_olaph_evaluation.json +0 -30
- wisent/examples/scripts/results/test_olaph_pairs.json +0 -8
- wisent/examples/scripts/results/test_openbookqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_openbookqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_openllm_evaluation.json +0 -51
- wisent/examples/scripts/results/test_openllm_pairs.json +0 -14
- wisent/examples/scripts/results/test_option_order_robustness_agieval_aqua_rat_evaluation.json +0 -30
- wisent/examples/scripts/results/test_option_order_robustness_agieval_aqua_rat_pairs.json +0 -8
- wisent/examples/scripts/results/test_paloma_evaluation.json +0 -51
- wisent/examples/scripts/results/test_paloma_pairs.json +0 -14
- wisent/examples/scripts/results/test_passkey/test_passkey_evaluation.json +0 -30
- wisent/examples/scripts/results/test_passkey/test_passkey_pairs.json +0 -8
- wisent/examples/scripts/results/test_paws-x_evaluation.json +0 -51
- wisent/examples/scripts/results/test_paws-x_pairs.json +0 -14
- wisent/examples/scripts/results/test_paws_en/test_paws_en_evaluation.json +0 -30
- wisent/examples/scripts/results/test_paws_en/test_paws_en_pairs.json +0 -8
- wisent/examples/scripts/results/test_penn_treebank_evaluation.json +0 -30
- wisent/examples/scripts/results/test_penn_treebank_pairs.json +0 -8
- wisent/examples/scripts/results/test_pile_10k/test_pile_10k_evaluation.json +0 -30
- wisent/examples/scripts/results/test_pile_10k/test_pile_10k_pairs.json +0 -8
- wisent/examples/scripts/results/test_piqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_piqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_polemo2_evaluation.json +0 -30
- wisent/examples/scripts/results/test_polemo2_pairs.json +0 -8
- wisent/examples/scripts/results/test_polymath_en_high_evaluation.json +0 -30
- wisent/examples/scripts/results/test_polymath_en_high_pairs.json +0 -8
- wisent/examples/scripts/results/test_polymath_en_medium_evaluation.json +0 -30
- wisent/examples/scripts/results/test_polymath_en_medium_pairs.json +0 -8
- wisent/examples/scripts/results/test_polymath_zh_high_evaluation.json +0 -30
- wisent/examples/scripts/results/test_polymath_zh_high_pairs.json +0 -8
- wisent/examples/scripts/results/test_polymath_zh_medium_evaluation.json +0 -30
- wisent/examples/scripts/results/test_polymath_zh_medium_pairs.json +0 -8
- wisent/examples/scripts/results/test_portuguese_bench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_portuguese_bench_pairs.json +0 -14
- wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat/test_prompt_robustness_agieval_aqua_rat_evaluation.json +0 -30
- wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat/test_prompt_robustness_agieval_aqua_rat_pairs.json +0 -8
- wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat_evaluation.json +0 -30
- wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat_pairs.json +0 -8
- wisent/examples/scripts/results/test_prost_evaluation.json +0 -30
- wisent/examples/scripts/results/test_prost_pairs.json +0 -8
- wisent/examples/scripts/results/test_ptb_evaluation.json +0 -30
- wisent/examples/scripts/results/test_ptb_pairs.json +0 -8
- wisent/examples/scripts/results/test_pubmedqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_pubmedqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_pythia_evaluation.json +0 -51
- wisent/examples/scripts/results/test_pythia_pairs.json +0 -14
- wisent/examples/scripts/results/test_qa4mre_evaluation.json +0 -30
- wisent/examples/scripts/results/test_qa4mre_pairs.json +0 -8
- wisent/examples/scripts/results/test_qasper_evaluation.json +0 -30
- wisent/examples/scripts/results/test_qasper_pairs.json +0 -8
- wisent/examples/scripts/results/test_race_evaluation.json +0 -30
- wisent/examples/scripts/results/test_race_pairs.json +0 -8
- wisent/examples/scripts/results/test_realtoxicityprompts_evaluation.json +0 -30
- wisent/examples/scripts/results/test_realtoxicityprompts_pairs.json +0 -8
- wisent/examples/scripts/results/test_recode_evaluation.json +0 -30
- wisent/examples/scripts/results/test_recode_pairs.json +0 -8
- wisent/examples/scripts/results/test_record_evaluation.json +0 -30
- wisent/examples/scripts/results/test_record_pairs.json +0 -8
- wisent/examples/scripts/results/test_ruler_evaluation.json +0 -51
- wisent/examples/scripts/results/test_ruler_pairs.json +0 -14
- wisent/examples/scripts/results/test_sciq_evaluation.json +0 -30
- wisent/examples/scripts/results/test_sciq_pairs.json +0 -8
- wisent/examples/scripts/results/test_score_evaluation.json +0 -51
- wisent/examples/scripts/results/test_score_pairs.json +0 -14
- wisent/examples/scripts/results/test_self_consistency_evaluation.json +0 -30
- wisent/examples/scripts/results/test_self_consistency_pairs.json +0 -8
- wisent/examples/scripts/results/test_siqa/test_siqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_siqa/test_siqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_siqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_siqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_spanish_bench_evaluation.json +0 -51
- wisent/examples/scripts/results/test_spanish_bench_pairs.json +0 -14
- wisent/examples/scripts/results/test_squad2_evaluation.json +0 -30
- wisent/examples/scripts/results/test_squad2_pairs.json +0 -8
- wisent/examples/scripts/results/test_squadv2_evaluation.json +0 -30
- wisent/examples/scripts/results/test_squadv2_pairs.json +0 -8
- wisent/examples/scripts/results/test_super-glue-lm-eval-v1-seq2seq_evaluation.json +0 -30
- wisent/examples/scripts/results/test_super-glue-lm-eval-v1-seq2seq_pairs.json +0 -8
- wisent/examples/scripts/results/test_super-glue-lm-eval-v1_evaluation.json +0 -51
- wisent/examples/scripts/results/test_super-glue-lm-eval-v1_pairs.json +0 -14
- wisent/examples/scripts/results/test_swag_evaluation.json +0 -30
- wisent/examples/scripts/results/test_swag_pairs.json +0 -8
- wisent/examples/scripts/results/test_tinyBenchmarks_evaluation.json +0 -51
- wisent/examples/scripts/results/test_tinyBenchmarks_pairs.json +0 -14
- wisent/examples/scripts/results/test_tmmluplus_evaluation.json +0 -51
- wisent/examples/scripts/results/test_tmmluplus_pairs.json +0 -14
- wisent/examples/scripts/results/test_translation_evaluation.json +0 -51
- wisent/examples/scripts/results/test_translation_pairs.json +0 -14
- wisent/examples/scripts/results/test_triviaqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_triviaqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_truthfulqa-multi_evaluation.json +0 -51
- wisent/examples/scripts/results/test_truthfulqa-multi_pairs.json +0 -14
- wisent/examples/scripts/results/test_truthfulqa_evaluation.json +0 -30
- wisent/examples/scripts/results/test_truthfulqa_mc1_evaluation.json +0 -30
- wisent/examples/scripts/results/test_truthfulqa_mc1_pairs.json +0 -8
- wisent/examples/scripts/results/test_truthfulqa_mc2_evaluation.json +0 -30
- wisent/examples/scripts/results/test_truthfulqa_mc2_pairs.json +0 -8
- wisent/examples/scripts/results/test_truthfulqa_pairs.json +0 -8
- wisent/examples/scripts/results/test_turkishmmlu_evaluation.json +0 -51
- wisent/examples/scripts/results/test_turkishmmlu_pairs.json +0 -14
- wisent/examples/scripts/results/test_unfair_tos_evaluation.json +0 -30
- wisent/examples/scripts/results/test_unfair_tos_pairs.json +0 -8
- wisent/examples/scripts/results/test_unscramble_evaluation.json +0 -51
- wisent/examples/scripts/results/test_unscramble_pairs.json +0 -14
- wisent/examples/scripts/results/test_webqs_evaluation.json +0 -30
- wisent/examples/scripts/results/test_webqs_pairs.json +0 -8
- wisent/examples/scripts/results/test_wikitext103_evaluation.json +0 -30
- wisent/examples/scripts/results/test_wikitext103_pairs.json +0 -8
- wisent/examples/scripts/results/test_wikitext_evaluation.json +0 -30
- wisent/examples/scripts/results/test_wikitext_pairs.json +0 -8
- wisent/examples/scripts/results/test_winogender_evaluation.json +0 -51
- wisent/examples/scripts/results/test_winogender_pairs.json +0 -14
- wisent/examples/scripts/results/test_winogrande_evaluation.json +0 -30
- wisent/examples/scripts/results/test_winogrande_pairs.json +0 -8
- wisent/examples/scripts/results/test_wmdp_evaluation.json +0 -30
- wisent/examples/scripts/results/test_wmdp_pairs.json +0 -8
- wisent/examples/scripts/results/test_wmt-ro-en-t5-prompt_evaluation.json +0 -30
- wisent/examples/scripts/results/test_wmt-ro-en-t5-prompt_pairs.json +0 -8
- wisent/examples/scripts/results/test_wmt14_en_fr_evaluation.json +0 -30
- wisent/examples/scripts/results/test_wmt14_en_fr_pairs.json +0 -8
- wisent/examples/scripts/results/test_wmt16_en_de_evaluation.json +0 -30
- wisent/examples/scripts/results/test_wmt16_en_de_pairs.json +0 -8
- wisent/examples/scripts/results/test_wmt16_ro_en_evaluation.json +0 -30
- wisent/examples/scripts/results/test_wmt16_ro_en_pairs.json +0 -8
- wisent/examples/scripts/results/test_wsc273_evaluation.json +0 -30
- wisent/examples/scripts/results/test_wsc273_pairs.json +0 -8
- wisent/examples/scripts/results/test_xcopa_evaluation.json +0 -51
- wisent/examples/scripts/results/test_xcopa_pairs.json +0 -14
- wisent/examples/scripts/results/test_xnli_eu_evaluation.json +0 -30
- wisent/examples/scripts/results/test_xnli_eu_pairs.json +0 -8
- wisent/examples/scripts/results/test_xnli_evaluation.json +0 -51
- wisent/examples/scripts/results/test_xnli_pairs.json +0 -14
- wisent/examples/scripts/results/test_xquad_evaluation.json +0 -51
- wisent/examples/scripts/results/test_xquad_pairs.json +0 -14
- wisent/examples/scripts/results/test_xstorycloze_evaluation.json +0 -51
- wisent/examples/scripts/results/test_xstorycloze_pairs.json +0 -14
- wisent/examples/scripts/results/test_xsum_evaluation.json +0 -30
- wisent/examples/scripts/results/test_xsum_pairs.json +0 -8
- wisent/examples/scripts/results/test_xwinograd_evaluation.json +0 -51
- wisent/examples/scripts/results/test_xwinograd_pairs.json +0 -14
- wisent/examples/scripts/results/test_yahoo_answers_topics_evaluation.json +0 -30
- wisent/examples/scripts/results/test_yahoo_answers_topics_pairs.json +0 -8
- {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/WHEEL +0 -0
- {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/entry_points.txt +0 -0
- {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/licenses/LICENSE +0 -0
- {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/top_level.txt +0 -0
|
@@ -155,12 +155,14 @@ def setup_tasks_parser(parser):
|
|
|
155
155
|
parser.add_argument("--device", type=str, default=None, help="Device to run on")
|
|
156
156
|
parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility")
|
|
157
157
|
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
|
158
|
+
# Extraction strategy - unified approach combining prompt format and token selection
|
|
159
|
+
from wisent.core.activations.extraction_strategy import ExtractionStrategy
|
|
158
160
|
parser.add_argument(
|
|
159
|
-
"--
|
|
161
|
+
"--extraction-strategy",
|
|
160
162
|
type=str,
|
|
161
|
-
choices=
|
|
162
|
-
default=
|
|
163
|
-
help="
|
|
163
|
+
choices=ExtractionStrategy.list_all(),
|
|
164
|
+
default=ExtractionStrategy.default().value,
|
|
165
|
+
help=f"Extraction strategy for activations. Options: {', '.join(ExtractionStrategy.list_all())}. Default: {ExtractionStrategy.default().value}",
|
|
164
166
|
)
|
|
165
167
|
parser.add_argument(
|
|
166
168
|
"--ground-truth-method",
|
|
@@ -385,21 +387,7 @@ def setup_tasks_parser(parser):
|
|
|
385
387
|
help="Directory for saving/loading classifiers and vectors (default: ./models)",
|
|
386
388
|
)
|
|
387
389
|
|
|
388
|
-
|
|
389
|
-
parser.add_argument(
|
|
390
|
-
"--prompt-construction-strategy",
|
|
391
|
-
type=str,
|
|
392
|
-
choices=["multiple_choice", "role_playing", "direct_completion", "instruction_following", "chat_template"],
|
|
393
|
-
default="chat_template",
|
|
394
|
-
help="Strategy for constructing prompts from question-answer pairs (default: chat_template)",
|
|
395
|
-
)
|
|
396
|
-
parser.add_argument(
|
|
397
|
-
"--token-targeting-strategy",
|
|
398
|
-
type=str,
|
|
399
|
-
choices=["choice_token", "continuation_token", "last_token", "first_token", "mean_pooling", "max_pooling"],
|
|
400
|
-
default="choice_token",
|
|
401
|
-
help="Strategy for targeting tokens during activation extraction (default: choice_token)",
|
|
402
|
-
)
|
|
390
|
+
|
|
403
391
|
|
|
404
392
|
# Normalization options
|
|
405
393
|
parser.add_argument("--normalize-mode", action="store_true", help="Enable normalization mode (legacy flag)")
|
|
@@ -150,5 +150,4 @@ class PerLayerBaseSteeringMethod(BaseSteeringMethod):
|
|
|
150
150
|
raw[layer] = self.train_for_layer(pos_list, neg_list)
|
|
151
151
|
|
|
152
152
|
dtype = self.kwargs.get("dtype", None)
|
|
153
|
-
|
|
154
|
-
return LayerActivations(raw, activation_aggregation_strategy=agg, dtype=dtype)
|
|
153
|
+
return LayerActivations(raw, dtype=dtype)
|
|
@@ -14,7 +14,7 @@ class CAAMethod(PerLayerBaseSteeringMethod):
|
|
|
14
14
|
"""
|
|
15
15
|
Contrastive Activation Additions (CAA).
|
|
16
16
|
For each layer: v = mean(positives) - mean(negatives),
|
|
17
|
-
optionally L2-normalized (kwargs: normalize=True, dtype
|
|
17
|
+
optionally L2-normalized (kwargs: normalize=True, dtype=...).
|
|
18
18
|
"""
|
|
19
19
|
name = "caa"
|
|
20
20
|
description = "Per-layer mean(pos)-mean(neg) over ContrastivePairSet."
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
import torch
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from wisent.core.steering_methods.core.atoms import PerLayerBaseSteeringMethod
|
|
8
|
+
from wisent.core.errors import InsufficientDataError
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"HyperplaneMethod",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class HyperplaneMethod(PerLayerBaseSteeringMethod):
|
|
16
|
+
"""
|
|
17
|
+
Hyperplane-based steering using classifier decision boundary.
|
|
18
|
+
|
|
19
|
+
Instead of computing mean(pos) - mean(neg) like CAA, this method trains
|
|
20
|
+
a logistic regression classifier to separate positive from negative activations,
|
|
21
|
+
then uses the classifier's weight vector (hyperplane normal) as the steering vector.
|
|
22
|
+
|
|
23
|
+
This works better when the geometry is orthogonal (each contrastive pair has
|
|
24
|
+
a unique direction) rather than linear (all pairs share a common direction).
|
|
25
|
+
In orthogonal geometry, CAA's mean difference cancels out to near-zero,
|
|
26
|
+
while the classifier can still find a separating hyperplane.
|
|
27
|
+
"""
|
|
28
|
+
name = "hyperplane"
|
|
29
|
+
description = "Classifier-based steering using logistic regression decision boundary as steering vector."
|
|
30
|
+
|
|
31
|
+
def train_for_layer(self, pos_list: List[torch.Tensor], neg_list: List[torch.Tensor]) -> torch.Tensor:
|
|
32
|
+
"""
|
|
33
|
+
Train hyperplane steering vector for a single layer using logistic regression.
|
|
34
|
+
|
|
35
|
+
arguments:
|
|
36
|
+
pos_list: List of positive activations (torch.Tensor) for this layer.
|
|
37
|
+
neg_list: List of negative activations (torch.Tensor) for this layer.
|
|
38
|
+
|
|
39
|
+
returns:
|
|
40
|
+
torch.Tensor steering vector for the layer (classifier weights / hyperplane normal).
|
|
41
|
+
"""
|
|
42
|
+
if not pos_list or not neg_list:
|
|
43
|
+
raise InsufficientDataError(reason="Both positive and negative lists must be non-empty.")
|
|
44
|
+
|
|
45
|
+
pos = torch.stack([t.detach().to("cpu").float().reshape(-1) for t in pos_list], dim=0)
|
|
46
|
+
neg = torch.stack([t.detach().to("cpu").float().reshape(-1) for t in neg_list], dim=0)
|
|
47
|
+
|
|
48
|
+
pos_np = pos.numpy()
|
|
49
|
+
neg_np = neg.numpy()
|
|
50
|
+
|
|
51
|
+
X = np.vstack([pos_np, neg_np])
|
|
52
|
+
y = np.array([1] * len(pos_np) + [0] * len(neg_np))
|
|
53
|
+
|
|
54
|
+
# Train logistic regression classifier
|
|
55
|
+
from sklearn.linear_model import LogisticRegression
|
|
56
|
+
|
|
57
|
+
max_iter = int(self.kwargs.get("max_iter", 1000))
|
|
58
|
+
C = float(self.kwargs.get("C", 1.0))
|
|
59
|
+
|
|
60
|
+
clf = LogisticRegression(max_iter=max_iter, C=C, solver="lbfgs")
|
|
61
|
+
clf.fit(X, y)
|
|
62
|
+
|
|
63
|
+
# Use classifier weights as steering vector
|
|
64
|
+
v = torch.tensor(clf.coef_[0], dtype=torch.float32)
|
|
65
|
+
|
|
66
|
+
if bool(self.kwargs.get("normalize", True)):
|
|
67
|
+
v = self._safe_l2_normalize(v)
|
|
68
|
+
|
|
69
|
+
return v
|
|
70
|
+
|
|
71
|
+
def _safe_l2_normalize(self, v: torch.Tensor, eps: float = 1e-12) -> torch.Tensor:
|
|
72
|
+
if v.ndim != 1:
|
|
73
|
+
v = v.reshape(-1)
|
|
74
|
+
return v / (torch.linalg.norm(v) + eps)
|
|
@@ -219,8 +219,7 @@ class PRISMMethod(BaseSteeringMethod):
|
|
|
219
219
|
primary_map: RawActivationMap = multi_result.to_single_direction_map()
|
|
220
220
|
|
|
221
221
|
dtype = self.kwargs.get("dtype", None)
|
|
222
|
-
|
|
223
|
-
return LayerActivations(primary_map, activation_aggregation_strategy=agg, dtype=dtype)
|
|
222
|
+
return LayerActivations(primary_map, dtype=dtype)
|
|
224
223
|
|
|
225
224
|
def train_multi(self, pair_set: ContrastivePairSet) -> MultiDirectionResult:
|
|
226
225
|
"""
|
|
@@ -41,15 +41,30 @@ class PULSEConfig:
|
|
|
41
41
|
"""Configuration for PULSE steering method."""
|
|
42
42
|
|
|
43
43
|
# Layer configuration
|
|
44
|
-
sensor_layer: int =
|
|
45
|
-
"""Layer index where condition gating is computed."""
|
|
44
|
+
sensor_layer: Optional[int] = None
|
|
45
|
+
"""Layer index where condition gating is computed. If None, auto-computed from num_layers."""
|
|
46
46
|
|
|
47
|
-
steering_layers: List[int] =
|
|
48
|
-
"""Layer indices where steering is applied."""
|
|
47
|
+
steering_layers: Optional[List[int]] = None
|
|
48
|
+
"""Layer indices where steering is applied. If None, auto-computed from num_layers."""
|
|
49
|
+
|
|
50
|
+
num_layers: Optional[int] = None
|
|
51
|
+
"""Total layers in the model. Used to auto-compute steering_layers and sensor_layer."""
|
|
49
52
|
|
|
50
53
|
per_layer_scaling: bool = True
|
|
51
54
|
"""Whether to learn/use different scaling per layer."""
|
|
52
55
|
|
|
56
|
+
def resolve_layers(self, num_layers: int) -> None:
|
|
57
|
+
"""Resolve steering_layers and sensor_layer based on model's num_layers."""
|
|
58
|
+
self.num_layers = num_layers
|
|
59
|
+
if self.sensor_layer is None:
|
|
60
|
+
# 75% through the network
|
|
61
|
+
self.sensor_layer = int(num_layers * 0.75)
|
|
62
|
+
if self.steering_layers is None:
|
|
63
|
+
# Middle to late layers (50% to 85% of network)
|
|
64
|
+
start = int(num_layers * 0.5)
|
|
65
|
+
end = int(num_layers * 0.85)
|
|
66
|
+
self.steering_layers = list(range(start, end))
|
|
67
|
+
|
|
53
68
|
# Condition gating
|
|
54
69
|
condition_threshold: float = 0.5
|
|
55
70
|
"""Threshold for condition activation (0-1)."""
|
|
@@ -188,9 +203,12 @@ class PULSEMethod(BaseSteeringMethod):
|
|
|
188
203
|
|
|
189
204
|
def __init__(self, **kwargs: Any) -> None:
|
|
190
205
|
super().__init__(**kwargs)
|
|
206
|
+
# steering_layers and sensor_layer default to None - resolved at training time
|
|
207
|
+
# based on actual num_layers in the model
|
|
191
208
|
self.config = PULSEConfig(
|
|
192
|
-
sensor_layer=kwargs.get("sensor_layer",
|
|
193
|
-
steering_layers=kwargs.get("steering_layers",
|
|
209
|
+
sensor_layer=kwargs.get("sensor_layer", None), # Auto-resolve from num_layers
|
|
210
|
+
steering_layers=kwargs.get("steering_layers", None), # Auto-resolve from num_layers
|
|
211
|
+
num_layers=kwargs.get("num_layers", None),
|
|
194
212
|
per_layer_scaling=kwargs.get("per_layer_scaling", True),
|
|
195
213
|
condition_threshold=kwargs.get("condition_threshold", 0.5),
|
|
196
214
|
gate_temperature=kwargs.get("gate_temperature", 0.1),
|
|
@@ -224,8 +242,7 @@ class PULSEMethod(BaseSteeringMethod):
|
|
|
224
242
|
|
|
225
243
|
# Return behavior vectors as LayerActivations
|
|
226
244
|
dtype = self.kwargs.get("dtype", None)
|
|
227
|
-
|
|
228
|
-
return LayerActivations(result.behavior_vectors, activation_aggregation_strategy=agg, dtype=dtype)
|
|
245
|
+
return LayerActivations(result.behavior_vectors, dtype=dtype)
|
|
229
246
|
|
|
230
247
|
def train_pulse(
|
|
231
248
|
self,
|
|
@@ -246,6 +263,20 @@ class PULSEMethod(BaseSteeringMethod):
|
|
|
246
263
|
if condition_pairs is None:
|
|
247
264
|
condition_pairs = behavior_pairs
|
|
248
265
|
|
|
266
|
+
# Detect num_layers from available data and resolve config
|
|
267
|
+
buckets = self._collect_from_set(behavior_pairs)
|
|
268
|
+
if buckets:
|
|
269
|
+
max_layer_idx = 0
|
|
270
|
+
for layer_name in buckets.keys():
|
|
271
|
+
try:
|
|
272
|
+
layer_idx = int(str(layer_name).split("_")[-1])
|
|
273
|
+
max_layer_idx = max(max_layer_idx, layer_idx)
|
|
274
|
+
except (ValueError, IndexError):
|
|
275
|
+
pass
|
|
276
|
+
detected_num_layers = max_layer_idx + 1
|
|
277
|
+
if self.config.steering_layers is None or self.config.sensor_layer is None:
|
|
278
|
+
self.config.resolve_layers(detected_num_layers)
|
|
279
|
+
|
|
249
280
|
# 1. Train behavior vectors for steering layers
|
|
250
281
|
behavior_vectors = self._train_behavior_vectors(behavior_pairs)
|
|
251
282
|
|
|
@@ -52,18 +52,42 @@ class TITANConfig:
|
|
|
52
52
|
"""Number of directions per layer in the steering manifold."""
|
|
53
53
|
|
|
54
54
|
# Layer configuration
|
|
55
|
-
steering_layers: List[int] =
|
|
56
|
-
"""Layer indices where steering can be applied."""
|
|
55
|
+
steering_layers: Optional[List[int]] = None
|
|
56
|
+
"""Layer indices where steering can be applied. If None, auto-computed from num_layers."""
|
|
57
57
|
|
|
58
|
-
sensor_layer: int =
|
|
59
|
-
"""Primary layer for gating decisions."""
|
|
58
|
+
sensor_layer: Optional[int] = None
|
|
59
|
+
"""Primary layer for gating decisions. If None, auto-computed from num_layers."""
|
|
60
|
+
|
|
61
|
+
num_layers: Optional[int] = None
|
|
62
|
+
"""Total layers in the model. Used to auto-compute steering_layers and sensor_layer."""
|
|
63
|
+
|
|
64
|
+
def resolve_layers(self, num_layers: int) -> None:
|
|
65
|
+
"""Resolve steering_layers and sensor_layer based on model's num_layers."""
|
|
66
|
+
self.num_layers = num_layers
|
|
67
|
+
if self.sensor_layer is None:
|
|
68
|
+
# 75% through the network
|
|
69
|
+
self.sensor_layer = int(num_layers * 0.75)
|
|
70
|
+
if self.steering_layers is None:
|
|
71
|
+
# Middle to late layers (50% to 90% of network)
|
|
72
|
+
start = int(num_layers * 0.5)
|
|
73
|
+
end = int(num_layers * 0.9)
|
|
74
|
+
self.steering_layers = list(range(start, end))
|
|
60
75
|
|
|
61
76
|
# Network architecture
|
|
62
|
-
gate_hidden_dim: int =
|
|
63
|
-
"""Hidden dimension for gating network."""
|
|
77
|
+
gate_hidden_dim: Optional[int] = None
|
|
78
|
+
"""Hidden dimension for gating network. If None, auto-computed as hidden_dim // 16."""
|
|
64
79
|
|
|
65
|
-
intensity_hidden_dim: int =
|
|
66
|
-
"""Hidden dimension for intensity network."""
|
|
80
|
+
intensity_hidden_dim: Optional[int] = None
|
|
81
|
+
"""Hidden dimension for intensity network. If None, auto-computed as hidden_dim // 32."""
|
|
82
|
+
|
|
83
|
+
def resolve_network_dims(self, hidden_dim: int) -> None:
|
|
84
|
+
"""Resolve network dimensions based on model's hidden dimension."""
|
|
85
|
+
if self.gate_hidden_dim is None:
|
|
86
|
+
# Scale with model size, but clamp to reasonable range [32, 512]
|
|
87
|
+
self.gate_hidden_dim = max(32, min(512, hidden_dim // 16))
|
|
88
|
+
if self.intensity_hidden_dim is None:
|
|
89
|
+
# Scale with model size, but clamp to reasonable range [16, 256]
|
|
90
|
+
self.intensity_hidden_dim = max(16, min(256, hidden_dim // 32))
|
|
67
91
|
|
|
68
92
|
# Training
|
|
69
93
|
optimization_steps: int = 200
|
|
@@ -392,12 +416,15 @@ class TITANMethod(BaseSteeringMethod):
|
|
|
392
416
|
|
|
393
417
|
def __init__(self, **kwargs: Any) -> None:
|
|
394
418
|
super().__init__(**kwargs)
|
|
419
|
+
# steering_layers and sensor_layer default to None - resolved at training time
|
|
420
|
+
# based on actual num_layers in the model
|
|
395
421
|
self.config = TITANConfig(
|
|
396
422
|
num_directions=kwargs.get("num_directions", 5),
|
|
397
|
-
steering_layers=kwargs.get("steering_layers",
|
|
398
|
-
sensor_layer=kwargs.get("sensor_layer",
|
|
399
|
-
|
|
400
|
-
|
|
423
|
+
steering_layers=kwargs.get("steering_layers", None), # Auto-resolve from num_layers
|
|
424
|
+
sensor_layer=kwargs.get("sensor_layer", None), # Auto-resolve from num_layers
|
|
425
|
+
num_layers=kwargs.get("num_layers", None),
|
|
426
|
+
gate_hidden_dim=kwargs.get("gate_hidden_dim", None), # Auto-resolve from hidden_dim
|
|
427
|
+
intensity_hidden_dim=kwargs.get("intensity_hidden_dim", None), # Auto-resolve from hidden_dim
|
|
401
428
|
optimization_steps=kwargs.get("optimization_steps", 200),
|
|
402
429
|
learning_rate=kwargs.get("learning_rate", 0.005),
|
|
403
430
|
warmup_steps=kwargs.get("warmup_steps", 20),
|
|
@@ -429,8 +456,7 @@ class TITANMethod(BaseSteeringMethod):
|
|
|
429
456
|
primary_map[layer] = result.get_effective_direction(layer)
|
|
430
457
|
|
|
431
458
|
dtype = self.kwargs.get("dtype", None)
|
|
432
|
-
|
|
433
|
-
return LayerActivations(primary_map, activation_aggregation_strategy=agg, dtype=dtype)
|
|
459
|
+
return LayerActivations(primary_map, dtype=dtype)
|
|
434
460
|
|
|
435
461
|
def train_titan(self, pair_set: ContrastivePairSet) -> TITANResult:
|
|
436
462
|
"""
|
|
@@ -448,6 +474,21 @@ class TITANMethod(BaseSteeringMethod):
|
|
|
448
474
|
if not buckets:
|
|
449
475
|
raise InsufficientDataError(reason="No valid activation pairs found")
|
|
450
476
|
|
|
477
|
+
# Detect num_layers from available data if not set
|
|
478
|
+
# Find max layer index to determine model size
|
|
479
|
+
max_layer_idx = 0
|
|
480
|
+
for layer_name in buckets.keys():
|
|
481
|
+
try:
|
|
482
|
+
layer_idx = int(str(layer_name).split("_")[-1])
|
|
483
|
+
max_layer_idx = max(max_layer_idx, layer_idx)
|
|
484
|
+
except (ValueError, IndexError):
|
|
485
|
+
pass
|
|
486
|
+
|
|
487
|
+
# Resolve steering_layers and sensor_layer based on detected num_layers
|
|
488
|
+
detected_num_layers = max_layer_idx + 1 # layers are 0-indexed
|
|
489
|
+
if self.config.steering_layers is None or self.config.sensor_layer is None:
|
|
490
|
+
self.config.resolve_layers(detected_num_layers)
|
|
491
|
+
|
|
451
492
|
# Filter to steering layers and determine hidden dim
|
|
452
493
|
layer_names = []
|
|
453
494
|
hidden_dim = None
|
|
@@ -472,6 +513,10 @@ class TITANMethod(BaseSteeringMethod):
|
|
|
472
513
|
if not layer_names or hidden_dim is None:
|
|
473
514
|
raise InsufficientDataError(reason="No valid steering layers found")
|
|
474
515
|
|
|
516
|
+
# Resolve network dimensions based on actual hidden_dim
|
|
517
|
+
if self.config.gate_hidden_dim is None or self.config.intensity_hidden_dim is None:
|
|
518
|
+
self.config.resolve_network_dims(hidden_dim)
|
|
519
|
+
|
|
475
520
|
num_layers = len(layer_names)
|
|
476
521
|
|
|
477
522
|
# Geometry analysis and adaptation
|
|
@@ -75,6 +75,7 @@ from wisent.core.steering_methods.core.atoms import BaseSteeringMethod
|
|
|
75
75
|
class SteeringMethodType(Enum):
|
|
76
76
|
"""Enumeration of all supported steering methods."""
|
|
77
77
|
CAA = "caa"
|
|
78
|
+
HYPERPLANE = "hyperplane"
|
|
78
79
|
PRISM = "prism"
|
|
79
80
|
PULSE = "pulse"
|
|
80
81
|
TITAN = "titan"
|
|
@@ -190,6 +191,44 @@ CAA_DEFINITION = SteeringMethodDefinition(
|
|
|
190
191
|
)
|
|
191
192
|
|
|
192
193
|
|
|
194
|
+
HYPERPLANE_DEFINITION = SteeringMethodDefinition(
|
|
195
|
+
name="hyperplane",
|
|
196
|
+
method_type=SteeringMethodType.HYPERPLANE,
|
|
197
|
+
description="Classifier-based steering using logistic regression decision boundary. Works better than CAA when geometry is orthogonal (each pair has unique direction rather than shared direction).",
|
|
198
|
+
method_class_path="wisent.core.steering_methods.methods.hyperplane.HyperplaneMethod",
|
|
199
|
+
parameters=[
|
|
200
|
+
SteeringMethodParameter(
|
|
201
|
+
name="normalize",
|
|
202
|
+
type=bool,
|
|
203
|
+
default=True,
|
|
204
|
+
help="L2-normalize the steering vector",
|
|
205
|
+
action="store_true",
|
|
206
|
+
cli_flag="--hyperplane-normalize",
|
|
207
|
+
),
|
|
208
|
+
SteeringMethodParameter(
|
|
209
|
+
name="max_iter",
|
|
210
|
+
type=int,
|
|
211
|
+
default=1000,
|
|
212
|
+
help="Maximum iterations for logistic regression",
|
|
213
|
+
cli_flag="--hyperplane-max-iter",
|
|
214
|
+
),
|
|
215
|
+
SteeringMethodParameter(
|
|
216
|
+
name="C",
|
|
217
|
+
type=float,
|
|
218
|
+
default=1.0,
|
|
219
|
+
help="Regularization strength (inverse). Smaller values = stronger regularization.",
|
|
220
|
+
cli_flag="--hyperplane-C",
|
|
221
|
+
),
|
|
222
|
+
],
|
|
223
|
+
optimization_config={
|
|
224
|
+
"strength_search_range": (0.1, 5.0),
|
|
225
|
+
"default_strength": 1.0,
|
|
226
|
+
},
|
|
227
|
+
default_strength=1.0,
|
|
228
|
+
strength_range=(0.1, 5.0),
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
|
|
193
232
|
PRISM_DEFINITION = SteeringMethodDefinition(
|
|
194
233
|
name="prism",
|
|
195
234
|
method_type=SteeringMethodType.PRISM,
|
|
@@ -289,15 +328,15 @@ PULSE_DEFINITION = SteeringMethodDefinition(
|
|
|
289
328
|
SteeringMethodParameter(
|
|
290
329
|
name="sensor_layer",
|
|
291
330
|
type=int,
|
|
292
|
-
default=
|
|
293
|
-
help="Layer index where condition gating is computed",
|
|
331
|
+
default=None,
|
|
332
|
+
help="Layer index where condition gating is computed (auto-computed if not set)",
|
|
294
333
|
cli_flag="--pulse-sensor-layer",
|
|
295
334
|
),
|
|
296
335
|
SteeringMethodParameter(
|
|
297
336
|
name="steering_layers",
|
|
298
337
|
type=str,
|
|
299
|
-
default=
|
|
300
|
-
help="Comma-separated layer indices where steering is applied",
|
|
338
|
+
default=None,
|
|
339
|
+
help="Comma-separated layer indices where steering is applied (auto-computed if not set)",
|
|
301
340
|
cli_flag="--pulse-steering-layers",
|
|
302
341
|
),
|
|
303
342
|
SteeringMethodParameter(
|
|
@@ -408,29 +447,29 @@ TITAN_DEFINITION = SteeringMethodDefinition(
|
|
|
408
447
|
SteeringMethodParameter(
|
|
409
448
|
name="steering_layers",
|
|
410
449
|
type=str,
|
|
411
|
-
default=
|
|
412
|
-
help="Comma-separated layer indices for steering",
|
|
450
|
+
default=None,
|
|
451
|
+
help="Comma-separated layer indices for steering (auto-computed if not set)",
|
|
413
452
|
cli_flag="--titan-steering-layers",
|
|
414
453
|
),
|
|
415
454
|
SteeringMethodParameter(
|
|
416
455
|
name="sensor_layer",
|
|
417
456
|
type=int,
|
|
418
|
-
default=
|
|
419
|
-
help="Primary layer for gating decisions",
|
|
457
|
+
default=None,
|
|
458
|
+
help="Primary layer for gating decisions (auto-computed if not set)",
|
|
420
459
|
cli_flag="--titan-sensor-layer",
|
|
421
460
|
),
|
|
422
461
|
SteeringMethodParameter(
|
|
423
462
|
name="gate_hidden_dim",
|
|
424
463
|
type=int,
|
|
425
|
-
default=
|
|
426
|
-
help="Hidden dimension for gating network",
|
|
464
|
+
default=None,
|
|
465
|
+
help="Hidden dimension for gating network (auto-computed as hidden_dim//16 if not set)",
|
|
427
466
|
cli_flag="--titan-gate-hidden-dim",
|
|
428
467
|
),
|
|
429
468
|
SteeringMethodParameter(
|
|
430
469
|
name="intensity_hidden_dim",
|
|
431
470
|
type=int,
|
|
432
|
-
default=
|
|
433
|
-
help="Hidden dimension for intensity network",
|
|
471
|
+
default=None,
|
|
472
|
+
help="Hidden dimension for intensity network (auto-computed as hidden_dim//32 if not set)",
|
|
434
473
|
cli_flag="--titan-intensity-hidden-dim",
|
|
435
474
|
),
|
|
436
475
|
SteeringMethodParameter(
|
|
@@ -518,6 +557,7 @@ class SteeringMethodRegistry:
|
|
|
518
557
|
|
|
519
558
|
_REGISTRY: Dict[str, SteeringMethodDefinition] = {
|
|
520
559
|
"caa": CAA_DEFINITION,
|
|
560
|
+
"hyperplane": HYPERPLANE_DEFINITION,
|
|
521
561
|
"prism": PRISM_DEFINITION,
|
|
522
562
|
"pulse": PULSE_DEFINITION,
|
|
523
563
|
"titan": TITAN_DEFINITION,
|
|
@@ -26,8 +26,8 @@ from enum import Enum, auto
|
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
|
|
28
28
|
from .config_manager import ModelConfigManager
|
|
29
|
-
from .activations.
|
|
30
|
-
|
|
29
|
+
from .activations.extraction_strategy import ExtractionStrategy
|
|
30
|
+
|
|
31
31
|
from wisent.core.errors import (
|
|
32
32
|
MissingParameterError,
|
|
33
33
|
SteeringMethodUnknownError,
|
|
@@ -60,22 +60,22 @@ class SteeringApplicationConfig:
|
|
|
60
60
|
gaussian_width: float = 0.2
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
def get_default_token_aggregation_strategies() -> List[
|
|
63
|
+
def get_default_token_aggregation_strategies() -> List[ExtractionStrategy]:
|
|
64
64
|
"""Get token aggregation strategies to test."""
|
|
65
65
|
return [
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
66
|
+
ExtractionStrategy.CHAT_LAST,
|
|
67
|
+
ExtractionStrategy.CHAT_MEAN,
|
|
68
|
+
ExtractionStrategy.CHAT_FIRST,
|
|
69
|
+
ExtractionStrategy.CHAT_MAX_NORM,
|
|
70
70
|
]
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def get_default_prompt_construction_strategies() -> List[
|
|
73
|
+
def get_default_prompt_construction_strategies() -> List[ExtractionStrategy]:
|
|
74
74
|
"""Get prompt construction strategies to test."""
|
|
75
75
|
return [
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
76
|
+
ExtractionStrategy.CHAT_LAST,
|
|
77
|
+
ExtractionStrategy.CHAT_LAST,
|
|
78
|
+
ExtractionStrategy.CHAT_LAST,
|
|
79
79
|
]
|
|
80
80
|
|
|
81
81
|
|
|
@@ -399,8 +399,8 @@ class SteeringOptimizer:
|
|
|
399
399
|
methods_to_test: Optional[List[SteeringMethod]] = None,
|
|
400
400
|
layer_range: Optional[str] = None,
|
|
401
401
|
strength_range: Optional[List[float]] = None,
|
|
402
|
-
token_aggregation_strategies: Optional[List[
|
|
403
|
-
prompt_construction_strategies: Optional[List[
|
|
402
|
+
token_aggregation_strategies: Optional[List[ExtractionStrategy]] = None,
|
|
403
|
+
prompt_construction_strategies: Optional[List[ExtractionStrategy]] = None,
|
|
404
404
|
steering_application_configs: Optional[List[SteeringApplicationConfig]] = None,
|
|
405
405
|
limit: int = 100,
|
|
406
406
|
max_time_minutes: float = 60.0,
|
|
@@ -603,8 +603,8 @@ class SteeringOptimizer:
|
|
|
603
603
|
method: SteeringMethod,
|
|
604
604
|
layer: int,
|
|
605
605
|
strength: float,
|
|
606
|
-
token_aggregation:
|
|
607
|
-
prompt_construction:
|
|
606
|
+
token_aggregation: ExtractionStrategy,
|
|
607
|
+
prompt_construction: ExtractionStrategy,
|
|
608
608
|
steering_application: SteeringApplicationConfig,
|
|
609
609
|
limit: int,
|
|
610
610
|
split_ratio: float
|
|
@@ -10,9 +10,9 @@ import datetime as _dt
|
|
|
10
10
|
|
|
11
11
|
from wisent.core.activations.core.atoms import (
|
|
12
12
|
LayerActivations,
|
|
13
|
-
ActivationAggregationStrategy,
|
|
14
13
|
RawActivationMap,
|
|
15
14
|
)
|
|
15
|
+
from wisent.core.activations.extraction_strategy import ExtractionStrategy
|
|
16
16
|
from wisent.core.models.wisent_model import WisentModel
|
|
17
17
|
|
|
18
18
|
from wisent.core.trainers.core.atoms import (
|
|
@@ -66,8 +66,7 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
|
|
|
66
66
|
self,
|
|
67
67
|
layers_spec: Sequence[str] | str | int | Sequence[int] | None,
|
|
68
68
|
method_kwargs: dict[str, Any] | None = None,
|
|
69
|
-
|
|
70
|
-
return_full_sequence: bool = False,
|
|
69
|
+
strategy: ExtractionStrategy = ExtractionStrategy.CHAT_LAST,
|
|
71
70
|
normalize_layers: bool = False,
|
|
72
71
|
save_dir: str | Path | None = None,
|
|
73
72
|
accept_low_quality_vector: bool = False,
|
|
@@ -87,16 +86,10 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
|
|
|
87
86
|
- range string "10-30" / "10..30"
|
|
88
87
|
- single int "12"
|
|
89
88
|
- None → use all available layers on the model
|
|
90
|
-
method:
|
|
91
|
-
Name of steering method ("caa").
|
|
92
89
|
method_kwargs:
|
|
93
90
|
Dict of hyperparameters for the method (e.g., {"normalize": True, "scale": 1.0}).
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
full sequences. Ignored if 'return_full_sequence=True'.
|
|
97
|
-
return_full_sequence:
|
|
98
|
-
If True, store full [T,H] sequences per layer (method then must know how
|
|
99
|
-
to collapse to vectors). Default False (collect [H] vectors directly).
|
|
91
|
+
strategy:
|
|
92
|
+
ExtractionStrategy to use during collection.
|
|
100
93
|
normalize_layers:
|
|
101
94
|
If True, L2-normalize activations layer-wise during collection.
|
|
102
95
|
save_dir:
|
|
@@ -112,12 +105,11 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
|
|
|
112
105
|
|
|
113
106
|
# 2) Collect activations for each pair
|
|
114
107
|
for i, pair in enumerate(self.pair_set.pairs):
|
|
115
|
-
updated = self.collector.
|
|
108
|
+
updated = self.collector.collect(
|
|
116
109
|
pair,
|
|
110
|
+
strategy=strategy,
|
|
117
111
|
layers=layers,
|
|
118
|
-
|
|
119
|
-
return_full_sequence=return_full_sequence,
|
|
120
|
-
normalize_layers=normalize_layers,
|
|
112
|
+
normalize=normalize_layers,
|
|
121
113
|
)
|
|
122
114
|
self.pair_set.pairs[i] = updated
|
|
123
115
|
|
|
@@ -221,8 +213,7 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
|
|
|
221
213
|
"layers_used": layers or "all",
|
|
222
214
|
"method": self.steering_method.name,
|
|
223
215
|
"method_kwargs": method_kwargs,
|
|
224
|
-
"
|
|
225
|
-
"return_full_sequence": bool(return_full_sequence),
|
|
216
|
+
"extraction_strategy": strategy.value,
|
|
226
217
|
"normalize_layers": bool(normalize_layers),
|
|
227
218
|
"num_pairs": len(self.pair_set.pairs),
|
|
228
219
|
"hidden_size": getattr(self.model, "hidden_size", None),
|
|
@@ -290,7 +281,7 @@ class WisentSteeringTrainer(BaseSteeringTrainer):
|
|
|
290
281
|
|
|
291
282
|
# Vectors
|
|
292
283
|
raw_map: RawActivationMap = result.steered_vectors.to_dict() # still tensors
|
|
293
|
-
cpu_map = {k: (v.detach().to("cpu") if isinstance(v, torch.Tensor) else v) for k, v in raw_map.items()
|
|
284
|
+
cpu_map = {k: (v.detach().to("cpu") if isinstance(v, torch.Tensor) else v) for k, v in raw_map.items()}
|
|
294
285
|
torch.save(cpu_map, out / "steering_vectors.pt")
|
|
295
286
|
|
|
296
287
|
# Summary (json-serializable)
|