wisent 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wisent/__init__.py +1 -1
- wisent/comparison/__init__.py +1 -0
- wisent/comparison/detect_bos_features.py +275 -0
- wisent/comparison/fgaa.py +465 -0
- wisent/comparison/lora.py +669 -0
- wisent/comparison/lora_dpo.py +592 -0
- wisent/comparison/main.py +444 -0
- wisent/comparison/ours.py +76 -0
- wisent/comparison/sae.py +304 -0
- wisent/comparison/utils.py +381 -0
- wisent/core/activations/activation_cache.py +393 -0
- wisent/core/activations/activations.py +3 -3
- wisent/core/activations/activations_collector.py +12 -7
- wisent/core/activations/classifier_inference_strategy.py +12 -11
- wisent/core/activations/extraction_strategy.py +260 -84
- wisent/core/classifiers/classifiers/core/atoms.py +3 -2
- wisent/core/cli/__init__.py +2 -1
- wisent/core/cli/agent/train_classifier.py +16 -3
- wisent/core/cli/check_linearity.py +35 -3
- wisent/core/cli/cluster_benchmarks.py +4 -6
- wisent/core/cli/create_steering_vector.py +6 -4
- wisent/core/cli/diagnose_vectors.py +7 -4
- wisent/core/cli/estimate_unified_goodness_time.py +6 -4
- wisent/core/cli/generate_pairs_from_task.py +9 -56
- wisent/core/cli/generate_vector_from_task.py +11 -20
- wisent/core/cli/geometry_search.py +137 -0
- wisent/core/cli/get_activations.py +2 -2
- wisent/core/cli/method_optimizer.py +4 -3
- wisent/core/cli/modify_weights.py +3 -2
- wisent/core/cli/optimize_sample_size.py +1 -1
- wisent/core/cli/optimize_steering.py +14 -16
- wisent/core/cli/optimize_weights.py +2 -1
- wisent/core/cli/preview_pairs.py +203 -0
- wisent/core/cli/steering_method_trainer.py +3 -3
- wisent/core/cli/tasks.py +19 -76
- wisent/core/cli/train_unified_goodness.py +3 -3
- wisent/core/contrastive_pairs/diagnostics/control_vectors.py +4 -4
- wisent/core/contrastive_pairs/diagnostics/linearity.py +7 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/agentic_search.py +37 -347
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aider_polyglot.py +113 -136
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codeforces.py +2 -12
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/coding_benchmarks.py +124 -504
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/faithbench.py +40 -63
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flames.py +46 -89
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flores.py +15 -4
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/frames.py +36 -20
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/hallucinations_leaderboard.py +3 -45
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livemathbench.py +42 -4
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/longform_writing.py +2 -112
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/math500.py +39 -4
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/medium_priority_benchmarks.py +475 -525
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mercury.py +65 -42
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/olympiadbench.py +2 -12
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/planbench.py +78 -219
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/polymath.py +37 -4
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/recode.py +84 -69
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/refusalbench.py +168 -160
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/simpleqa.py +44 -25
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/tau_bench.py +3 -103
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/toolbench.py +3 -97
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/toolemu.py +48 -182
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +3 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +19 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aclue.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench_hard.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/advanced.py +2 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aexams.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrimmlu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrixnli.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabculture.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_exams.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_complete.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_light.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabicmmlu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aradice.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +1 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +1 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/babi.py +36 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/basque_bench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bbq.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/belebele.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/benchmarks.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bertaqa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhs.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhtc.py +3 -5
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp_nl.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +22 -5
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/c4.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cabbq.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/careqa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalan_bench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalanqa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catcola.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +10 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ceval.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ceval_valid.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/chain.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/chartqa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/claim.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/click.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cmmlu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cnn.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cocoteros.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coedit.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/commonsense.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/commonsense_qa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copal_id.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +3 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/csatqa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cycle.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darija_bench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darijahellaswag.py +2 -6
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darijammlu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/dbpedia.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/discrim_eval.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/doc.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/epec.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench_ca.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench_es.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/esbbq.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ethics.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_exams.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_proficiency.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_reading.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_trivia.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/evalita_llm.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/financial.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/flan.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/french_bench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galician_bench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gaokao.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/glianorex.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_mmlu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_piqa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gpt3.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/groundcocoa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/haerae.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_ethics.py +5 -9
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_math.py +63 -16
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/histoires_morales.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hrm8k.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/humaneval_infilling.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/icelandic_winogrande.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse_scaling.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ja.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard_mc.py +1 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kobest.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kormedmcqa.py +5 -17
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_cloze.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_multilingual.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/law.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/leaderboard.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lingoly.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/llama3.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lm_syneval.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbenchv2.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mastermind.py +2 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/med_concepts_qa.py +2 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/meddialog.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medical.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medmcqa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mela.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/metabench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/minerva_math.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlusr.py +3 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multiblimp.py +2 -5
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/non.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_exact.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_gen_exact.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_mc.py +4 -8
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_mc_log_likelihoods.py +4 -8
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/nq_open.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_arc_multilingual.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_hellaswag_multilingual.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_mmlu_multilingual.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_truthfulqa_multilingual.py +2 -5
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/olaph.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/option.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/parafraseja.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/parafrases.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paws.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paws_x.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/persona.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/phrases.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pile.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/portuguese_bench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prompt.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper_bool.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnlieu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/random.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/reversed.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ruler.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/score.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/scrolls.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/scrolls_mc.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/self.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sglue.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sglue_rte.py +2 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/siqa.py +4 -7
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/spanish_bench.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/storycloze.py +2 -6
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/summarization.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super_glue.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swde.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sycophancy.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/t0.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/teca.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyarc.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinybenchmarks.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinygsm8k.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyhellaswag.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinymmlu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinytruthfulqa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinywinogrande.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tmmluplus.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +9 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turblimp_core.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu_mc.py +0 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/unscramble.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/vaxx.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +3 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wmdp.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc273.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xcopa.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xlsum.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xquad.py +2 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +2 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +2 -2
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/zhoblimp.py +1 -3
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +173 -6
- wisent/core/data_loaders/loaders/lm_loader.py +12 -1
- wisent/core/geometry_runner.py +995 -0
- wisent/core/geometry_search_space.py +237 -0
- wisent/core/hyperparameter_optimizer.py +1 -1
- wisent/core/main.py +3 -0
- wisent/core/models/core/atoms.py +5 -3
- wisent/core/models/wisent_model.py +1 -1
- wisent/core/optuna/classifier/optuna_classifier_optimizer.py +2 -2
- wisent/core/parser_arguments/check_linearity_parser.py +12 -2
- wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +2 -2
- wisent/core/parser_arguments/generate_vector_from_task_parser.py +6 -13
- wisent/core/parser_arguments/geometry_search_parser.py +61 -0
- wisent/core/parser_arguments/get_activations_parser.py +5 -14
- wisent/core/parser_arguments/main_parser.py +8 -0
- wisent/core/parser_arguments/train_unified_goodness_parser.py +2 -2
- wisent/core/steering.py +5 -3
- wisent/core/steering_methods/methods/hyperplane.py +2 -1
- wisent/core/synthetic/generators/nonsense_generator.py +30 -18
- wisent/core/trainers/steering_trainer.py +2 -2
- wisent/core/utils/device.py +27 -27
- wisent/core/utils/layer_combinations.py +70 -0
- wisent/examples/__init__.py +1 -0
- wisent/examples/scripts/__init__.py +1 -0
- wisent/examples/scripts/count_all_benchmarks.py +121 -0
- wisent/examples/scripts/discover_directions.py +469 -0
- wisent/examples/scripts/extract_benchmark_info.py +71 -0
- wisent/examples/scripts/search_all_short_names.py +31 -0
- wisent/examples/scripts/test_all_benchmarks.py +138 -0
- wisent/examples/scripts/test_all_benchmarks_new.py +28 -0
- wisent/examples/scripts/test_contrastive_pairs_all_supported.py +230 -0
- wisent/examples/scripts/test_nonsense_baseline.py +261 -0
- wisent/examples/scripts/test_one_benchmark.py +324 -0
- wisent/examples/scripts/test_one_coding_benchmark.py +293 -0
- wisent/parameters/lm_eval/broken_in_lm_eval.json +179 -2
- wisent/parameters/lm_eval/category_directions.json +137 -0
- wisent/parameters/lm_eval/repair_plan.json +282 -0
- wisent/parameters/lm_eval/weak_contrastive_pairs.json +38 -0
- wisent/parameters/lm_eval/working_benchmarks.json +206 -0
- wisent/parameters/lm_eval/working_benchmarks_categorized.json +236 -0
- wisent/tests/test_detector_accuracy.py +1 -1
- wisent/tests/visualize_geometry.py +1 -1
- {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/METADATA +5 -1
- {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/RECORD +328 -358
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/browsecomp.py +0 -245
- wisent/examples/contrastive_pairs/humanization_human_vs_ai.json +0 -2112
- wisent/examples/scripts/1/test_basqueglue_evaluation.json +0 -51
- wisent/examples/scripts/1/test_basqueglue_pairs.json +0 -14
- wisent/examples/scripts/1/test_bec2016eu_evaluation.json +0 -51
- wisent/examples/scripts/1/test_bec2016eu_pairs.json +0 -14
- wisent/examples/scripts/1/test_belebele_evaluation.json +0 -51
- wisent/examples/scripts/1/test_belebele_pairs.json +0 -14
- wisent/examples/scripts/1/test_benchmarks_evaluation.json +0 -51
- wisent/examples/scripts/1/test_benchmarks_pairs.json +0 -14
- wisent/examples/scripts/1/test_bertaqa_evaluation.json +0 -51
- wisent/examples/scripts/1/test_bertaqa_pairs.json +0 -14
- wisent/examples/scripts/1/test_bhtc_v2_evaluation.json +0 -30
- wisent/examples/scripts/1/test_bhtc_v2_pairs.json +0 -8
- wisent/examples/scripts/1/test_boolq-seq2seq_evaluation.json +0 -30
- wisent/examples/scripts/1/test_boolq-seq2seq_pairs.json +0 -8
- wisent/examples/scripts/1/test_cabreu_evaluation.json +0 -30
- wisent/examples/scripts/1/test_cabreu_pairs.json +0 -8
- wisent/examples/scripts/1/test_careqa_en_evaluation.json +0 -30
- wisent/examples/scripts/1/test_careqa_en_pairs.json +0 -8
- wisent/examples/scripts/1/test_careqa_evaluation.json +0 -30
- wisent/examples/scripts/1/test_careqa_pairs.json +0 -8
- wisent/examples/scripts/1/test_catalanqa_evaluation.json +0 -30
- wisent/examples/scripts/1/test_catalanqa_pairs.json +0 -8
- wisent/examples/scripts/1/test_catcola_evaluation.json +0 -30
- wisent/examples/scripts/1/test_catcola_pairs.json +0 -8
- wisent/examples/scripts/1/test_chartqa_evaluation.json +0 -30
- wisent/examples/scripts/1/test_chartqa_pairs.json +0 -8
- wisent/examples/scripts/1/test_claim_stance_topic_evaluation.json +0 -30
- wisent/examples/scripts/1/test_claim_stance_topic_pairs.json +0 -8
- wisent/examples/scripts/1/test_cnn_dailymail_evaluation.json +0 -30
- wisent/examples/scripts/1/test_cnn_dailymail_pairs.json +0 -8
- wisent/examples/scripts/1/test_cocoteros_es_evaluation.json +0 -30
- wisent/examples/scripts/1/test_cocoteros_es_pairs.json +0 -8
- wisent/examples/scripts/1/test_coedit_gec_evaluation.json +0 -30
- wisent/examples/scripts/1/test_coedit_gec_pairs.json +0 -8
- wisent/examples/scripts/1/test_cola_evaluation.json +0 -30
- wisent/examples/scripts/1/test_cola_pairs.json +0 -8
- wisent/examples/scripts/1/test_coqcat_evaluation.json +0 -30
- wisent/examples/scripts/1/test_coqcat_pairs.json +0 -8
- wisent/examples/scripts/1/test_dbpedia_14_evaluation.json +0 -30
- wisent/examples/scripts/1/test_dbpedia_14_pairs.json +0 -8
- wisent/examples/scripts/1/test_epec_koref_bin_evaluation.json +0 -30
- wisent/examples/scripts/1/test_epec_koref_bin_pairs.json +0 -8
- wisent/examples/scripts/1/test_ethos_binary_evaluation.json +0 -30
- wisent/examples/scripts/1/test_ethos_binary_pairs.json +0 -8
- wisent/examples/scripts/2/test_afrimgsm_direct_amh_evaluation.json +0 -30
- wisent/examples/scripts/2/test_afrimgsm_direct_amh_pairs.json +0 -8
- wisent/examples/scripts/2/test_afrimmlu_direct_amh_evaluation.json +0 -30
- wisent/examples/scripts/2/test_afrimmlu_direct_amh_pairs.json +0 -8
- wisent/examples/scripts/2/test_afrixnli_en_direct_amh_evaluation.json +0 -30
- wisent/examples/scripts/2/test_afrixnli_en_direct_amh_pairs.json +0 -8
- wisent/examples/scripts/2/test_arc_ar_evaluation.json +0 -30
- wisent/examples/scripts/2/test_arc_ar_pairs.json +0 -8
- wisent/examples/scripts/2/test_atis_evaluation.json +0 -30
- wisent/examples/scripts/2/test_atis_pairs.json +0 -8
- wisent/examples/scripts/2/test_babi_evaluation.json +0 -30
- wisent/examples/scripts/2/test_babi_pairs.json +0 -8
- wisent/examples/scripts/2/test_babilong_evaluation.json +0 -30
- wisent/examples/scripts/2/test_babilong_pairs.json +0 -8
- wisent/examples/scripts/2/test_bangla_mmlu_evaluation.json +0 -30
- wisent/examples/scripts/2/test_bangla_mmlu_pairs.json +0 -8
- wisent/examples/scripts/2/test_basque-glue_pairs.json +0 -14
- {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/WHEEL +0 -0
- {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/entry_points.txt +0 -0
- {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/licenses/LICENSE +0 -0
- {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Discover unified directions for skill categories (coding, math, hallucination, etc.)
|
|
3
|
+
|
|
4
|
+
Uses GeometrySearchSpace to test all models, strategies, and layer combinations.
|
|
5
|
+
For each category, determines if a unified direction exists.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
# Run for all models (sequentially)
|
|
9
|
+
python -m wisent.examples.scripts.discover_directions
|
|
10
|
+
|
|
11
|
+
# Run for a specific model (for parallel execution)
|
|
12
|
+
python -m wisent.examples.scripts.discover_directions --model meta-llama/Llama-3.2-1B-Instruct
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import json
|
|
17
|
+
import subprocess
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Dict, List, Any, Optional
|
|
20
|
+
from dataclasses import dataclass, field, asdict
|
|
21
|
+
|
|
22
|
+
S3_BUCKET = "wisent-bucket"
|
|
23
|
+
S3_PREFIX = "direction_discovery"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def s3_sync_download(model_name: str, output_dir: Path) -> None:
|
|
27
|
+
"""Download existing results from S3."""
|
|
28
|
+
model_prefix = model_name.replace('/', '_')
|
|
29
|
+
s3_path = f"s3://{S3_BUCKET}/{S3_PREFIX}/{model_prefix}/"
|
|
30
|
+
try:
|
|
31
|
+
subprocess.run(
|
|
32
|
+
["aws", "s3", "sync", s3_path, str(output_dir), "--quiet"],
|
|
33
|
+
check=False,
|
|
34
|
+
capture_output=True,
|
|
35
|
+
)
|
|
36
|
+
print(f"Synced existing results from S3: {s3_path}")
|
|
37
|
+
except Exception as e:
|
|
38
|
+
print(f"S3 download skipped: {e}")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def s3_upload_file(local_path: Path, model_name: str) -> None:
|
|
42
|
+
"""Upload a single file to S3."""
|
|
43
|
+
model_prefix = model_name.replace('/', '_')
|
|
44
|
+
s3_path = f"s3://{S3_BUCKET}/{S3_PREFIX}/{model_prefix}/{local_path.name}"
|
|
45
|
+
try:
|
|
46
|
+
subprocess.run(
|
|
47
|
+
["aws", "s3", "cp", str(local_path), s3_path, "--quiet"],
|
|
48
|
+
check=True,
|
|
49
|
+
capture_output=True,
|
|
50
|
+
)
|
|
51
|
+
print(f" Uploaded to S3: {s3_path}")
|
|
52
|
+
except Exception as e:
|
|
53
|
+
print(f" S3 upload failed: {e}")
|
|
54
|
+
|
|
55
|
+
from wisent.core.geometry_search_space import (
|
|
56
|
+
GeometrySearchSpace,
|
|
57
|
+
GeometrySearchConfig,
|
|
58
|
+
)
|
|
59
|
+
from wisent.core.geometry_runner import (
|
|
60
|
+
GeometryRunner,
|
|
61
|
+
GeometrySearchResults,
|
|
62
|
+
GeometryTestResult,
|
|
63
|
+
)
|
|
64
|
+
from wisent.core.contrastive_pairs.diagnostics.control_vectors import (
|
|
65
|
+
GeometryAnalysisConfig,
|
|
66
|
+
StructureType,
|
|
67
|
+
)
|
|
68
|
+
from wisent.core.models.wisent_model import WisentModel
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def load_categorized_benchmarks() -> Dict[str, List[str]]:
|
|
72
|
+
"""Load benchmarks grouped by category."""
|
|
73
|
+
params_dir = Path(__file__).parent.parent.parent / "parameters" / "lm_eval"
|
|
74
|
+
with open(params_dir / "working_benchmarks_categorized.json") as f:
|
|
75
|
+
return json.load(f)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def load_category_directions() -> Dict[str, Dict]:
|
|
79
|
+
"""Load hypothesized directions for each category."""
|
|
80
|
+
params_dir = Path(__file__).parent.parent.parent / "parameters" / "lm_eval"
|
|
81
|
+
with open(params_dir / "category_directions.json") as f:
|
|
82
|
+
return json.load(f)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class CategoryResult:
|
|
87
|
+
"""Result for a single category."""
|
|
88
|
+
category: str
|
|
89
|
+
description: str
|
|
90
|
+
benchmarks_tested: List[str]
|
|
91
|
+
total_tests: int
|
|
92
|
+
|
|
93
|
+
# Step 1: Signal detection
|
|
94
|
+
avg_signal_strength: float # MLP CV accuracy
|
|
95
|
+
signal_exists: bool # avg_signal_strength > 0.6
|
|
96
|
+
|
|
97
|
+
# Step 2: Linearity check
|
|
98
|
+
avg_linear_probe_accuracy: float # Linear probe CV accuracy
|
|
99
|
+
is_linear: bool # signal is linear (CAA will work)
|
|
100
|
+
|
|
101
|
+
# NEW: Nonlinear signal metrics
|
|
102
|
+
avg_knn_accuracy_k10: float # k-NN CV accuracy
|
|
103
|
+
avg_mmd_rbf: float # Maximum Mean Discrepancy
|
|
104
|
+
avg_local_dim_pos: float # Local intrinsic dim of positive class
|
|
105
|
+
avg_local_dim_neg: float # Local intrinsic dim of negative class
|
|
106
|
+
avg_fisher_max: float # Max Fisher ratio
|
|
107
|
+
avg_density_ratio: float # Density ratio
|
|
108
|
+
|
|
109
|
+
# Step 3: Geometry details (only meaningful if signal_exists)
|
|
110
|
+
structure_distribution: Dict[str, int]
|
|
111
|
+
structure_percentages: Dict[str, float]
|
|
112
|
+
dominant_structure: str
|
|
113
|
+
avg_linear_score: float
|
|
114
|
+
avg_cohens_d: float
|
|
115
|
+
|
|
116
|
+
# Final recommendation
|
|
117
|
+
recommendation: str # NO_SIGNAL, CAA, or NONLINEAR
|
|
118
|
+
has_unified_direction: bool
|
|
119
|
+
best_config: Optional[Dict[str, Any]] = None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass
|
|
123
|
+
class DiscoveryResults:
|
|
124
|
+
"""Results from full discovery run."""
|
|
125
|
+
model: str
|
|
126
|
+
categories: Dict[str, CategoryResult] = field(default_factory=dict)
|
|
127
|
+
|
|
128
|
+
def summary(self) -> str:
|
|
129
|
+
lines = [
|
|
130
|
+
f"Model: {self.model}",
|
|
131
|
+
f"Categories analyzed: {len(self.categories)}",
|
|
132
|
+
"",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
# Group by recommendation
|
|
136
|
+
caa_ready = [] # Has signal AND linear
|
|
137
|
+
nonlinear = [] # Has signal but NOT linear
|
|
138
|
+
no_signal = [] # No signal
|
|
139
|
+
|
|
140
|
+
for name, cat in self.categories.items():
|
|
141
|
+
if not cat.signal_exists:
|
|
142
|
+
no_signal.append(name)
|
|
143
|
+
elif cat.is_linear:
|
|
144
|
+
caa_ready.append(name)
|
|
145
|
+
else:
|
|
146
|
+
nonlinear.append(name)
|
|
147
|
+
|
|
148
|
+
if caa_ready:
|
|
149
|
+
lines.append(f"CAA READY - Linear signal ({len(caa_ready)}):")
|
|
150
|
+
for name in sorted(caa_ready, key=lambda n: self.categories[n].avg_signal_strength, reverse=True):
|
|
151
|
+
cat = self.categories[name]
|
|
152
|
+
lines.append(f" {name}: signal={cat.avg_signal_strength:.2f}, linear={cat.avg_linear_probe_accuracy:.2f}, kNN={cat.avg_knn_accuracy_k10:.2f}")
|
|
153
|
+
|
|
154
|
+
if nonlinear:
|
|
155
|
+
lines.append(f"\nNONLINEAR - Need different method ({len(nonlinear)}):")
|
|
156
|
+
for name in nonlinear:
|
|
157
|
+
cat = self.categories[name]
|
|
158
|
+
lines.append(f" {name}: signal={cat.avg_signal_strength:.2f}, linear={cat.avg_linear_probe_accuracy:.2f}, kNN={cat.avg_knn_accuracy_k10:.2f}, MMD={cat.avg_mmd_rbf:.3f}")
|
|
159
|
+
|
|
160
|
+
if no_signal:
|
|
161
|
+
lines.append(f"\nNO SIGNAL ({len(no_signal)}):")
|
|
162
|
+
for name in no_signal:
|
|
163
|
+
cat = self.categories[name]
|
|
164
|
+
lines.append(f" {name}: signal={cat.avg_signal_strength:.2f}, kNN={cat.avg_knn_accuracy_k10:.2f}")
|
|
165
|
+
|
|
166
|
+
return "\n".join(lines)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def analyze_category_results(results: GeometrySearchResults, category: str, description: str, benchmarks: List[str]) -> CategoryResult:
|
|
170
|
+
"""Analyze geometry results for a category."""
|
|
171
|
+
if not results.results:
|
|
172
|
+
return CategoryResult(
|
|
173
|
+
category=category,
|
|
174
|
+
description=description,
|
|
175
|
+
benchmarks_tested=benchmarks,
|
|
176
|
+
total_tests=0,
|
|
177
|
+
avg_signal_strength=0.5,
|
|
178
|
+
signal_exists=False,
|
|
179
|
+
avg_linear_probe_accuracy=0.5,
|
|
180
|
+
is_linear=False,
|
|
181
|
+
avg_knn_accuracy_k10=0.5,
|
|
182
|
+
avg_mmd_rbf=0.0,
|
|
183
|
+
avg_local_dim_pos=0.0,
|
|
184
|
+
avg_local_dim_neg=0.0,
|
|
185
|
+
avg_fisher_max=0.0,
|
|
186
|
+
avg_density_ratio=1.0,
|
|
187
|
+
structure_distribution={},
|
|
188
|
+
structure_percentages={},
|
|
189
|
+
dominant_structure="error",
|
|
190
|
+
avg_linear_score=0.0,
|
|
191
|
+
avg_cohens_d=0.0,
|
|
192
|
+
recommendation="NO_RESULTS",
|
|
193
|
+
has_unified_direction=False,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
dist = results.get_structure_distribution()
|
|
197
|
+
total = sum(dist.values())
|
|
198
|
+
|
|
199
|
+
percentages = {k: 100 * v / total for k, v in dist.items()} if total > 0 else {}
|
|
200
|
+
|
|
201
|
+
# Determine dominant structure
|
|
202
|
+
dominant = max(dist.items(), key=lambda x: x[1])[0] if dist else "unknown"
|
|
203
|
+
|
|
204
|
+
# Step 1: Signal detection (MLP CV accuracy)
|
|
205
|
+
avg_signal_strength = sum(r.signal_strength for r in results.results) / len(results.results)
|
|
206
|
+
signal_exists = avg_signal_strength > 0.6
|
|
207
|
+
|
|
208
|
+
# Step 2: Linearity check (Linear probe CV accuracy)
|
|
209
|
+
avg_linear_probe_accuracy = sum(r.linear_probe_accuracy for r in results.results) / len(results.results)
|
|
210
|
+
# Signal is linear if linear probe is close to MLP accuracy
|
|
211
|
+
is_linear = signal_exists and avg_linear_probe_accuracy > 0.6 and (avg_signal_strength - avg_linear_probe_accuracy) < 0.15
|
|
212
|
+
|
|
213
|
+
# Step 2b: Nonlinear signal metrics
|
|
214
|
+
avg_knn_accuracy_k10 = sum(r.knn_accuracy_k10 for r in results.results) / len(results.results)
|
|
215
|
+
avg_mmd_rbf = sum(r.mmd_rbf for r in results.results) / len(results.results)
|
|
216
|
+
avg_local_dim_pos = sum(r.local_dim_pos for r in results.results) / len(results.results)
|
|
217
|
+
avg_local_dim_neg = sum(r.local_dim_neg for r in results.results) / len(results.results)
|
|
218
|
+
avg_fisher_max = sum(r.fisher_max for r in results.results) / len(results.results)
|
|
219
|
+
avg_density_ratio = sum(r.density_ratio for r in results.results) / len(results.results)
|
|
220
|
+
|
|
221
|
+
# Step 3: Geometry details
|
|
222
|
+
avg_linear_score = sum(r.linear_score for r in results.results) / len(results.results)
|
|
223
|
+
avg_cohens_d = sum(r.cohens_d for r in results.results) / len(results.results)
|
|
224
|
+
|
|
225
|
+
# Final recommendation
|
|
226
|
+
if not signal_exists:
|
|
227
|
+
recommendation = "NO_SIGNAL"
|
|
228
|
+
elif is_linear:
|
|
229
|
+
recommendation = "CAA"
|
|
230
|
+
else:
|
|
231
|
+
recommendation = "NONLINEAR"
|
|
232
|
+
|
|
233
|
+
# Unified direction exists if we have linear signal
|
|
234
|
+
has_unified = is_linear
|
|
235
|
+
|
|
236
|
+
# Best config - prefer high signal_strength
|
|
237
|
+
best = sorted(results.results, key=lambda r: r.signal_strength, reverse=True)[:1]
|
|
238
|
+
best_config = None
|
|
239
|
+
if best:
|
|
240
|
+
b = best[0]
|
|
241
|
+
best_config = {
|
|
242
|
+
"benchmark": b.benchmark,
|
|
243
|
+
"strategy": b.strategy,
|
|
244
|
+
"layers": b.layers,
|
|
245
|
+
"signal_strength": b.signal_strength,
|
|
246
|
+
"linear_probe_accuracy": b.linear_probe_accuracy,
|
|
247
|
+
"is_linear": b.is_linear,
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return CategoryResult(
|
|
251
|
+
category=category,
|
|
252
|
+
description=description,
|
|
253
|
+
benchmarks_tested=benchmarks,
|
|
254
|
+
total_tests=total,
|
|
255
|
+
avg_signal_strength=avg_signal_strength,
|
|
256
|
+
signal_exists=signal_exists,
|
|
257
|
+
avg_linear_probe_accuracy=avg_linear_probe_accuracy,
|
|
258
|
+
is_linear=is_linear,
|
|
259
|
+
avg_knn_accuracy_k10=avg_knn_accuracy_k10,
|
|
260
|
+
avg_mmd_rbf=avg_mmd_rbf,
|
|
261
|
+
avg_local_dim_pos=avg_local_dim_pos,
|
|
262
|
+
avg_local_dim_neg=avg_local_dim_neg,
|
|
263
|
+
avg_fisher_max=avg_fisher_max,
|
|
264
|
+
avg_density_ratio=avg_density_ratio,
|
|
265
|
+
structure_distribution=dist,
|
|
266
|
+
structure_percentages=percentages,
|
|
267
|
+
dominant_structure=dominant,
|
|
268
|
+
avg_linear_score=avg_linear_score,
|
|
269
|
+
avg_cohens_d=avg_cohens_d,
|
|
270
|
+
recommendation=recommendation,
|
|
271
|
+
has_unified_direction=has_unified,
|
|
272
|
+
best_config=best_config,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def run_discovery_for_model(model_name: str, output_dir: Path):
|
|
277
|
+
"""Run discovery for a single model with resume support."""
|
|
278
|
+
categories = load_categorized_benchmarks()
|
|
279
|
+
category_info = load_category_directions()
|
|
280
|
+
search_space = GeometrySearchSpace()
|
|
281
|
+
|
|
282
|
+
print(f"\n{'=' * 70}")
|
|
283
|
+
print(f"MODEL: {model_name}")
|
|
284
|
+
print("=" * 70)
|
|
285
|
+
|
|
286
|
+
# Download existing results from S3 for resume
|
|
287
|
+
s3_sync_download(model_name, output_dir)
|
|
288
|
+
|
|
289
|
+
# Check which categories are already done
|
|
290
|
+
model_prefix = model_name.replace('/', '_')
|
|
291
|
+
completed_categories = set()
|
|
292
|
+
for cat_name in categories.keys():
|
|
293
|
+
cat_file = output_dir / f"{model_prefix}_{cat_name}.json"
|
|
294
|
+
if cat_file.exists() and cat_file.stat().st_size > 100:
|
|
295
|
+
completed_categories.add(cat_name)
|
|
296
|
+
print(f" [SKIP] {cat_name} already completed")
|
|
297
|
+
|
|
298
|
+
remaining = [c for c in categories.keys() if c not in completed_categories]
|
|
299
|
+
if not remaining:
|
|
300
|
+
print("All categories already completed!")
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
print(f"\nCompleted: {len(completed_categories)}/15, Remaining: {len(remaining)}")
|
|
304
|
+
print(f"Categories to run: {remaining}")
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
model = WisentModel(model_name, device="cuda")
|
|
308
|
+
print(f"Loaded: {model.num_layers} layers, hidden={model.hidden_size}")
|
|
309
|
+
except Exception as e:
|
|
310
|
+
print(f"Failed to load model: {e}")
|
|
311
|
+
return None
|
|
312
|
+
|
|
313
|
+
cache_dir = f"/tmp/wisent_direction_cache_{model_prefix}"
|
|
314
|
+
|
|
315
|
+
model_results = DiscoveryResults(model=model_name)
|
|
316
|
+
|
|
317
|
+
# Run for each remaining category
|
|
318
|
+
for cat_name in remaining:
|
|
319
|
+
benchmarks = categories[cat_name]
|
|
320
|
+
print(f"\n{'-' * 50}")
|
|
321
|
+
print(f"Category: {cat_name.upper()} ({len(benchmarks)} benchmarks)")
|
|
322
|
+
print("-" * 50)
|
|
323
|
+
|
|
324
|
+
info = category_info.get(cat_name, {})
|
|
325
|
+
description = info.get("description", "")
|
|
326
|
+
print(f"Description: {description}")
|
|
327
|
+
|
|
328
|
+
# Create search space for this category
|
|
329
|
+
cat_config = GeometrySearchConfig(
|
|
330
|
+
pairs_per_benchmark=search_space.config.pairs_per_benchmark,
|
|
331
|
+
max_layer_combo_size=search_space.config.max_layer_combo_size,
|
|
332
|
+
cache_dir=cache_dir,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
cat_space = GeometrySearchSpace(
|
|
336
|
+
models=[model_name],
|
|
337
|
+
strategies=search_space.strategies,
|
|
338
|
+
benchmarks=benchmarks,
|
|
339
|
+
config=cat_config,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Run geometry search
|
|
343
|
+
runner = GeometryRunner(cat_space, model, cache_dir=cache_dir)
|
|
344
|
+
|
|
345
|
+
try:
|
|
346
|
+
results = runner.run(show_progress=True)
|
|
347
|
+
cat_result = analyze_category_results(results, cat_name, description, benchmarks)
|
|
348
|
+
model_results.categories[cat_name] = cat_result
|
|
349
|
+
|
|
350
|
+
print(f"\n Step 1 - Signal: {cat_result.avg_signal_strength:.3f} ({'EXISTS' if cat_result.signal_exists else 'NONE'})")
|
|
351
|
+
print(f" Step 2 - Linear: {cat_result.avg_linear_probe_accuracy:.3f} ({'YES' if cat_result.is_linear else 'NO'})")
|
|
352
|
+
print(f" Recommendation: {cat_result.recommendation}")
|
|
353
|
+
|
|
354
|
+
# Save per-category results immediately
|
|
355
|
+
cat_file = output_dir / f"{model_prefix}_{cat_name}.json"
|
|
356
|
+
results.save(str(cat_file))
|
|
357
|
+
print(f" Saved: {cat_file}")
|
|
358
|
+
|
|
359
|
+
# Upload to S3 immediately for durability
|
|
360
|
+
s3_upload_file(cat_file, model_name)
|
|
361
|
+
|
|
362
|
+
except Exception as e:
|
|
363
|
+
print(f" ERROR: {e}")
|
|
364
|
+
continue
|
|
365
|
+
|
|
366
|
+
# Save/update model summary (merge with existing if any)
|
|
367
|
+
summary_file = output_dir / f"{model_prefix}_summary.json"
|
|
368
|
+
|
|
369
|
+
# Load existing summary if present
|
|
370
|
+
existing_categories = {}
|
|
371
|
+
if summary_file.exists():
|
|
372
|
+
with open(summary_file) as f:
|
|
373
|
+
existing = json.load(f)
|
|
374
|
+
existing_categories = existing.get("categories", {})
|
|
375
|
+
|
|
376
|
+
# Merge new results
|
|
377
|
+
all_categories = {**existing_categories, **{k: asdict(v) for k, v in model_results.categories.items()}}
|
|
378
|
+
|
|
379
|
+
with open(summary_file, "w") as f:
|
|
380
|
+
json.dump({
|
|
381
|
+
"model": model_name,
|
|
382
|
+
"categories": all_categories
|
|
383
|
+
}, f, indent=2)
|
|
384
|
+
|
|
385
|
+
# Upload summary to S3
|
|
386
|
+
s3_upload_file(summary_file, model_name)
|
|
387
|
+
|
|
388
|
+
print(f"\n{model_results.summary()}")
|
|
389
|
+
|
|
390
|
+
# Cleanup model
|
|
391
|
+
del model
|
|
392
|
+
|
|
393
|
+
return model_results
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def run_discovery(model_filter: Optional[str] = None, samples_per_benchmark: int = 50):
|
|
397
|
+
"""Run full category direction discovery."""
|
|
398
|
+
print("=" * 70)
|
|
399
|
+
print("CATEGORY DIRECTION DISCOVERY")
|
|
400
|
+
print("=" * 70)
|
|
401
|
+
|
|
402
|
+
# Load categories
|
|
403
|
+
categories = load_categorized_benchmarks()
|
|
404
|
+
category_info = load_category_directions()
|
|
405
|
+
|
|
406
|
+
print(f"Categories: {list(categories.keys())}")
|
|
407
|
+
print(f"Total benchmarks: {sum(len(b) for b in categories.values())}")
|
|
408
|
+
|
|
409
|
+
# Get search space config
|
|
410
|
+
search_space = GeometrySearchSpace()
|
|
411
|
+
search_space.config.pairs_per_benchmark = samples_per_benchmark
|
|
412
|
+
|
|
413
|
+
# Filter models if specified
|
|
414
|
+
if model_filter:
|
|
415
|
+
models_to_test = [model_filter]
|
|
416
|
+
else:
|
|
417
|
+
models_to_test = search_space.models
|
|
418
|
+
|
|
419
|
+
print(f"\nModels to test: {models_to_test}")
|
|
420
|
+
print(f"Strategies: {[s.value for s in search_space.strategies]}")
|
|
421
|
+
print(f"Pairs per benchmark: {search_space.config.pairs_per_benchmark}")
|
|
422
|
+
|
|
423
|
+
# Output directory
|
|
424
|
+
output_dir = Path("/tmp/direction_discovery")
|
|
425
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
426
|
+
|
|
427
|
+
all_model_results = {}
|
|
428
|
+
|
|
429
|
+
# Run for each model
|
|
430
|
+
for model_name in models_to_test:
|
|
431
|
+
model_results = run_discovery_for_model(model_name, output_dir)
|
|
432
|
+
if model_results:
|
|
433
|
+
all_model_results[model_name] = model_results
|
|
434
|
+
|
|
435
|
+
# Save overall summary (only if running all models)
|
|
436
|
+
if not model_filter and all_model_results:
|
|
437
|
+
overall_file = output_dir / "discovery_summary.json"
|
|
438
|
+
overall = {
|
|
439
|
+
"models": list(all_model_results.keys()),
|
|
440
|
+
"categories": list(categories.keys()),
|
|
441
|
+
"results": {}
|
|
442
|
+
}
|
|
443
|
+
for model_name, results in all_model_results.items():
|
|
444
|
+
overall["results"][model_name] = {
|
|
445
|
+
cat: {
|
|
446
|
+
"has_unified_direction": r.has_unified_direction,
|
|
447
|
+
"dominant_structure": r.dominant_structure,
|
|
448
|
+
"recommendation": r.recommendation,
|
|
449
|
+
"avg_linear_score": r.avg_linear_score,
|
|
450
|
+
}
|
|
451
|
+
for cat, r in results.categories.items()
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
with open(overall_file, "w") as f:
|
|
455
|
+
json.dump(overall, f, indent=2)
|
|
456
|
+
|
|
457
|
+
print(f"\n{'=' * 70}")
|
|
458
|
+
print("DISCOVERY COMPLETE")
|
|
459
|
+
print("=" * 70)
|
|
460
|
+
print(f"Results saved to: {output_dir}")
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
if __name__ == "__main__":
|
|
464
|
+
parser = argparse.ArgumentParser(description="Discover unified directions for skill categories")
|
|
465
|
+
parser.add_argument("--model", type=str, default=None, help="Specific model to test (for parallel execution)")
|
|
466
|
+
parser.add_argument("--samples-per-benchmark", type=int, default=50, help="Number of samples per benchmark (default: 50)")
|
|
467
|
+
args = parser.parse_args()
|
|
468
|
+
|
|
469
|
+
run_discovery(model_filter=args.model, samples_per_benchmark=args.samples_per_benchmark)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Extract benchmark information from README files."""
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
def extract_info_from_readme(readme_path):
|
|
9
|
+
"""Extract title, description, paper, homepage from README."""
|
|
10
|
+
content = readme_path.read_text()
|
|
11
|
+
|
|
12
|
+
info = {
|
|
13
|
+
"name": readme_path.stem,
|
|
14
|
+
"description": "",
|
|
15
|
+
"paper": "",
|
|
16
|
+
"homepage": ""
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
# Extract title (first # heading)
|
|
20
|
+
title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
|
|
21
|
+
if title_match:
|
|
22
|
+
info["name"] = title_match.group(1).strip()
|
|
23
|
+
|
|
24
|
+
# Extract paper link
|
|
25
|
+
paper_match = re.search(r'(?:Paper|Abstract).*?https?://[^\s\)]+', content, re.IGNORECASE)
|
|
26
|
+
if paper_match:
|
|
27
|
+
info["paper"] = paper_match.group(0)
|
|
28
|
+
|
|
29
|
+
# Extract homepage
|
|
30
|
+
homepage_match = re.search(r'Homepage.*?https?://[^\s\)]+', content, re.IGNORECASE)
|
|
31
|
+
if homepage_match:
|
|
32
|
+
info["homepage"] = homepage_match.group(0)
|
|
33
|
+
|
|
34
|
+
# Extract description (first paragraph after title or abstract)
|
|
35
|
+
desc_match = re.search(r'(?:Abstract|##\s*Abstract)[:\s]*(.+?)(?:\n\n|\n#)', content, re.DOTALL | re.IGNORECASE)
|
|
36
|
+
if desc_match:
|
|
37
|
+
desc = desc_match.group(1).strip()
|
|
38
|
+
# Clean up
|
|
39
|
+
desc = re.sub(r'\s+', ' ', desc)
|
|
40
|
+
desc = desc[:500] # Limit length
|
|
41
|
+
info["description"] = desc
|
|
42
|
+
else:
|
|
43
|
+
# Try to get first substantial paragraph
|
|
44
|
+
paragraphs = [p.strip() for p in content.split('\n\n') if len(p.strip()) > 50]
|
|
45
|
+
if paragraphs:
|
|
46
|
+
info["description"] = paragraphs[0][:500]
|
|
47
|
+
|
|
48
|
+
return info
|
|
49
|
+
|
|
50
|
+
def main():
|
|
51
|
+
readmes_dir = Path(__file__).parent / "readmes"
|
|
52
|
+
output_file = Path(__file__).parent / "results" / "benchmark_descriptions.json"
|
|
53
|
+
|
|
54
|
+
output_file.parent.mkdir(exist_ok=True)
|
|
55
|
+
|
|
56
|
+
all_info = {}
|
|
57
|
+
|
|
58
|
+
for readme_path in sorted(readmes_dir.glob("*.md")):
|
|
59
|
+
benchmark_name = readme_path.stem
|
|
60
|
+
info = extract_info_from_readme(readme_path)
|
|
61
|
+
all_info[benchmark_name] = info
|
|
62
|
+
print(f"Processed {benchmark_name}")
|
|
63
|
+
|
|
64
|
+
with open(output_file, 'w') as f:
|
|
65
|
+
json.dump(all_info, f, indent=2)
|
|
66
|
+
|
|
67
|
+
print(f"\nExtracted info for {len(all_info)} benchmarks")
|
|
68
|
+
print(f"Output: {output_file}")
|
|
69
|
+
|
|
70
|
+
if __name__ == "__main__":
|
|
71
|
+
main()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Search for all short task names that might match Tag."""
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
sys.path.insert(0, '/Users/lukaszbartoszcze/Documents/CodingProjects/Wisent/backends/wisent-open-source')
|
|
6
|
+
|
|
7
|
+
from lm_eval.tasks import TaskManager
|
|
8
|
+
|
|
9
|
+
def main():
|
|
10
|
+
tm = TaskManager()
|
|
11
|
+
|
|
12
|
+
# Get all 3-letter task names
|
|
13
|
+
three_letter = [t for t in tm.task_index.keys() if len(t) == 3]
|
|
14
|
+
print(f"Found {len(three_letter)} tasks with exactly 3 letters:")
|
|
15
|
+
for task in sorted(three_letter):
|
|
16
|
+
print(f" - {task}")
|
|
17
|
+
|
|
18
|
+
# Get all 3-4 letter task names starting with T
|
|
19
|
+
short_t = [t for t in tm.task_index.keys() if t.lower().startswith('t') and 3 <= len(t) <= 4]
|
|
20
|
+
print(f"\nFound {len(short_t)} tasks with 3-4 letters starting with 't':")
|
|
21
|
+
for task in sorted(short_t):
|
|
22
|
+
print(f" - {task}")
|
|
23
|
+
|
|
24
|
+
# Search for anything with T, A, G in sequence (case insensitive)
|
|
25
|
+
tag_pattern = [t for t in tm.task_index.keys() if 't' in t.lower() and 'a' in t.lower() and 'g' in t.lower()]
|
|
26
|
+
print(f"\nFound {len(tag_pattern)} tasks containing t, a, and g:")
|
|
27
|
+
for task in sorted(tag_pattern)[:20]: # Show first 20
|
|
28
|
+
print(f" - {task}")
|
|
29
|
+
|
|
30
|
+
if __name__ == "__main__":
|
|
31
|
+
main()
|