wisent 0.7.379__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wisent/__init__.py +64 -0
- wisent/cli.py +114 -0
- wisent/core/__init__.py +40 -0
- wisent/core/activations/__init__.py +26 -0
- wisent/core/activations/activations.py +97 -0
- wisent/core/activations/activations_collector.py +506 -0
- wisent/core/activations/core/__init__.py +0 -0
- wisent/core/activations/core/atoms.py +219 -0
- wisent/core/activations/prompt_construction_strategy.py +47 -0
- wisent/core/adapters/__init__.py +22 -0
- wisent/core/adapters/audio.py +616 -0
- wisent/core/adapters/base.py +420 -0
- wisent/core/adapters/multimodal.py +738 -0
- wisent/core/adapters/robotics.py +643 -0
- wisent/core/adapters/text.py +441 -0
- wisent/core/adapters/video.py +555 -0
- wisent/core/agent/__init__.py +1 -0
- wisent/core/agent/budget.py +644 -0
- wisent/core/agent/device_benchmarks.py +691 -0
- wisent/core/agent/diagnose/__init__.py +1 -0
- wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
- wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
- wisent/core/agent/diagnose/create_classifier.py +1155 -0
- wisent/core/agent/diagnose/response_diagnostics.py +273 -0
- wisent/core/agent/diagnose/select_classifiers.py +507 -0
- wisent/core/agent/diagnose/synthetic_classifier_option.py +755 -0
- wisent/core/agent/diagnose/tasks/__init__.py +33 -0
- wisent/core/agent/diagnose/tasks/task_manager.py +1453 -0
- wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
- wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
- wisent/core/agent/diagnose.py +249 -0
- wisent/core/agent/steer.py +215 -0
- wisent/core/agent/timeout.py +134 -0
- wisent/core/autonomous_agent.py +1158 -0
- wisent/core/benchmark_extractors.py +372 -0
- wisent/core/benchmark_registry.py +151 -0
- wisent/core/bigcode_extractors.py +26 -0
- wisent/core/bigcode_integration.py +886 -0
- wisent/core/branding.py +108 -0
- wisent/core/classifier/__init__.py +1 -0
- wisent/core/classifier/models/__init__.py +1 -0
- wisent/core/classifiers/__init__.py +1 -0
- wisent/core/classifiers/classifiers/__init__.py +0 -0
- wisent/core/classifiers/classifiers/core/__init__.py +0 -0
- wisent/core/classifiers/classifiers/core/atoms.py +748 -0
- wisent/core/classifiers/classifiers/models/__init__.py +0 -0
- wisent/core/classifiers/classifiers/models/logistic.py +29 -0
- wisent/core/classifiers/classifiers/models/mlp.py +47 -0
- wisent/core/classifiers/classifiers/rotator.py +137 -0
- wisent/core/classifiers/core/__init__.py +1 -0
- wisent/core/classifiers/models/__init__.py +1 -0
- wisent/core/classifiers/pipeline_steps/__init__.py +1 -0
- wisent/core/cli/__init__.py +26 -0
- wisent/core/cli/agent/__init__.py +15 -0
- wisent/core/cli/agent/apply_steering.py +192 -0
- wisent/core/cli/agent/evaluate_response.py +128 -0
- wisent/core/cli/agent/generate_synthetic_pairs.py +123 -0
- wisent/core/cli/agent/main.py +139 -0
- wisent/core/cli/agent/train_classifier.py +173 -0
- wisent/core/cli/check_linearity.py +126 -0
- wisent/core/cli/create_steering_vector.py +304 -0
- wisent/core/cli/diagnose_pairs.py +153 -0
- wisent/core/cli/diagnose_vectors.py +404 -0
- wisent/core/cli/estimate_unified_goodness_time.py +428 -0
- wisent/core/cli/evaluate_refusal.py +241 -0
- wisent/core/cli/evaluate_responses.py +926 -0
- wisent/core/cli/generate_humanization_pairs.py +128 -0
- wisent/core/cli/generate_pairs.py +175 -0
- wisent/core/cli/generate_pairs_from_task.py +108 -0
- wisent/core/cli/generate_responses.py +160 -0
- wisent/core/cli/generate_vector_from_synthetic.py +217 -0
- wisent/core/cli/generate_vector_from_task.py +248 -0
- wisent/core/cli/get_activations.py +192 -0
- wisent/core/cli/inference_config.py +84 -0
- wisent/core/cli/inference_config_cli.py +54 -0
- wisent/core/cli/modify_weights.py +660 -0
- wisent/core/cli/multi_steer.py +112 -0
- wisent/core/cli/optimization_cache.py +298 -0
- wisent/core/cli/optimize.py +621 -0
- wisent/core/cli/optimize_classification.py +473 -0
- wisent/core/cli/optimize_sample_size.py +390 -0
- wisent/core/cli/optimize_steering.py +3421 -0
- wisent/core/cli/optimize_weights.py +1287 -0
- wisent/core/cli/steering_method_trainer.py +641 -0
- wisent/core/cli/steering_search_space.py +508 -0
- wisent/core/cli/tasks.py +940 -0
- wisent/core/cli/train_unified_goodness.py +681 -0
- wisent/core/cli_logger.py +22 -0
- wisent/core/config_manager.py +1731 -0
- wisent/core/contrastive_pairs/__init__.py +15 -0
- wisent/core/contrastive_pairs/core/__init__.py +0 -0
- wisent/core/contrastive_pairs/core/atoms.py +45 -0
- wisent/core/contrastive_pairs/core/buliders.py +59 -0
- wisent/core/contrastive_pairs/core/pair.py +183 -0
- wisent/core/contrastive_pairs/core/response.py +153 -0
- wisent/core/contrastive_pairs/core/serialization.py +306 -0
- wisent/core/contrastive_pairs/core/set.py +192 -0
- wisent/core/contrastive_pairs/diagnostics/__init__.py +79 -0
- wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
- wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
- wisent/core/contrastive_pairs/diagnostics/control_vectors.py +1655 -0
- wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
- wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
- wisent/core/contrastive_pairs/diagnostics/duplicates.py +118 -0
- wisent/core/contrastive_pairs/diagnostics/linearity.py +325 -0
- wisent/core/contrastive_pairs/diagnostics/vector_quality.py +620 -0
- wisent/core/contrastive_pairs/huggingface_pairs/__init__.py +1 -0
- wisent/core/contrastive_pairs/huggingface_pairs/atoms.py +255 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_extractor_manifest.py +470 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_extractor_registry.py +136 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/__init__.py +44 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/agentbench.py +225 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/agentharm.py +267 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/agentic_search.py +444 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aider_polyglot.py +225 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aime.py +118 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aime2024.py +74 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aime2025.py +73 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/alpaca_eval.py +153 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/apps.py +182 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/arena_hard.py +179 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/atis.py +89 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/babilong.py +96 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/bangla_mmlu.py +108 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/basqueglue.py +217 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/bec2016eu.py +99 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/bfcl.py +283 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/bhtc_v2.py +87 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/browsecomp.py +245 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/chain_of_thought.py +89 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/chinese_simpleqa.py +209 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/cluewsc.py +177 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/cnn_dailymail.py +92 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codeforces.py +378 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue.py +109 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text.py +15 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_go.py +64 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_java.py +65 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_javascript.py +65 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_php.py +65 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_python.py +65 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_ruby.py +65 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/coding_benchmarks.py +844 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/coedit_gec.py +79 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/conala.py +133 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/concode.py +111 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/dbpedia_14.py +91 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/doc_vqa.py +102 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/donotanswer.py +236 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/ds1000.py +129 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/ds_1000.py +155 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/epec_koref_bin.py +85 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/ethos_binary.py +82 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/evalita_mp.py +165 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/evalita_sp_sum_task_fp_small_p1.py +89 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/facts_grounding.py +181 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/faithbench.py +295 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/financial_tweets.py +100 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flames.py +270 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flan_held_in.py +98 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flores.py +572 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/frames.py +143 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/freebase.py +99 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/get_negative_example_livecodebench.py +146 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/get_positive_example_livecodebench.py +140 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/gpt3_translation_benchmarks.py +98 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/hallucinations_leaderboard.py +389 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/halueval.py +246 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/harmbench.py +250 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/healthbench.py +181 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/hle.py +106 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/hmmt.py +117 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/humaneval.py +119 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/humanevalpack.py +102 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/instruct_humaneval.py +180 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/instructhumaneval.py +129 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/iwslt2017_ar_en.py +98 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/iwslt2017_en_ar.py +98 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/jailbreakbench.py +258 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/law_stack_exchange.py +101 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/ledgar.py +118 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livecodebench.py +61 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livecodebench_contrastive_pair_generator.py +491 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livecodebench_v6.py +263 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livemathbench.py +230 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/llama.py +96 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/longform_writing.py +285 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/m_mmlu.py +96 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/math.py +186 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/math500.py +146 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mbpp.py +142 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/meddialog.py +79 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/medical_abstracts.py +101 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/medium_priority_benchmarks.py +787 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mercury.py +111 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mmlu_redux.py +194 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mmlusr.py +108 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multimedqa.py +99 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multipl_e.py +109 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple.py +96 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_choice.py +87 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_cpp.py +128 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_go.py +128 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_java.py +128 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_js.py +128 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_py.py +15 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_rs.py +128 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/non_greedy_robustness_agieval_aqua_rat.py +92 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/olympiadbench.py +287 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/openllm.py +99 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/option_order_robustness_agieval_aqua_rat.py +92 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/or_bench.py +300 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/penn_treebank.py +80 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/planbench.py +317 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/polymath.py +467 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/prompt_robustness_agieval_aqua_rat.py +92 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/pythia.py +99 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/recode.py +131 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/refusalbench.py +280 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/scicode.py +275 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/self_consistency.py +90 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/simpleqa.py +145 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/sorry_bench.py +211 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/stsb.py +79 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/super_glue_lm_eval_v1.py +99 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/super_glue_lm_eval_v1_seq2seq.py +98 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/super_glue_t5_prompt.py +123 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/super_gpqa.py +106 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/swe_bench.py +428 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/swe_bench_verified.py +158 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/sycophancy_eval.py +205 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/t0_eval.py +79 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/tag.py +98 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/tau_bench.py +305 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/tmlu.py +109 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/toolbench.py +360 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/toolemu.py +386 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/travelplanner.py +286 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/truthfulqa_generation.py +128 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/unfair_tos.py +83 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/vaxx_stance.py +86 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wiceu.py +85 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wikitext103.py +97 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wildguard.py +280 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt14_en_fr.py +97 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt14_fr_en.py +97 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt16_de_en.py +90 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt16_en_de.py +90 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt16_en_ro.py +90 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt16_ro_en.py +90 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt_ro_en_t5_prompt.py +90 -0
- wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/xsum.py +81 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +265 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/__init__.py +472 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/aclue.py +24 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/acp.py +33 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/acpbench.py +39 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/advanced_ai_risk.py +59 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/aexams.py +14 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrimgsm.py +10 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrimmlu.py +10 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrixnli.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench.py +14 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_adr.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_afriqa.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_afrisenti.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_belebele.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_flores.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_injongointent.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_mafand.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_masakhaner.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_masakhanews.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_masakhapos.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_naijarc.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_nollysenti.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_ntrex.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_openai_mmlu.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_salt.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_sib.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_uhura_arc_easy.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_xlsum.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/agieval.py +33 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/anli.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arab_culture.py +24 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabic_leaderboard_acva.py +67 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabic_leaderboard_acva_light.py +67 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabic_leaderboard_complete.py +24 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabic_leaderboard_light.py +81 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabicmmlu.py +59 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/aradice.py +36 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arc.py +61 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arithmetic.py +19 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/basque_bench.py +37 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/bbh.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/bbq.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/belebele.py +293 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/bertaqa.py +25 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/bigbench.py +300 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/blimp.py +76 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/careqa.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/catalan_bench.py +43 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/ceval_valid.py +61 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/cmmlu.py +76 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/code_x_glue.py +16 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/copal_id.py +11 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/crows_pairs.py +31 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/csatqa.py +15 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/darija.py +29 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/darijammlu.py +57 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/egymmlu.py +62 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/eus.py +76 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/evalita_mp.py +93 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/fld.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/flores.py +466 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/freebase.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/french_bench.py +23 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/galician_bench.py +41 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/glianorex.py +11 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/global_mmlu.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/gpqa.py +27 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/gsm8k.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/gsm8k_platinum.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/haerae.py +14 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/headqa.py +11 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/hellaswag.py +39 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/hendrycks_ethics.py +14 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/hendrycks_math.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/hrm8k.py +20 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/inverse.py +22 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/japanese_leaderboard.py +20 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/jsonschema_bench.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/kbl.py +85 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/kmmlu.py +281 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/kobest.py +14 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/kormedmcqa.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/lambada.py +28 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/leaderboard.py +52 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/libra.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/lingoly.py +11 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/longbench.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/m.py +43 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mastermind.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mathqa.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/med.py +24 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/meddialog.py +12 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/medqa.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mela.py +18 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/metabench.py +36 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mgsm.py +44 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/minerva_math.py +16 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mlqa.py +58 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlu.py +70 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlu_pro.py +23 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlu_pro_plus.py +23 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlu_prox.py +191 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlusr.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmmu.py +46 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/model_written_evals.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/multiblimp.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/non.py +23 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/noreval.py +143 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/noridiom.py +20 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/nortruthfulqa.py +32 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/nrk.py +20 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi_arc_multilingual.py +10 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi_hellaswag_multilingual.py +24 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi_mmlu_multilingual.py +24 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi_truthfulqa_multilingual.py +34 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/paloma.py +25 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/pawsx.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/persona.py +144 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/pile.py +31 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/polemo2.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/portuguese_bench.py +31 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/prompt.py +23 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/qa4mre.py +12 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/qasper.py +11 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/ru.py +19 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/ruler.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/score.py +20 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/scrolls.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/self_consistency.py +11 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/spanish_bench.py +38 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/storycloze.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/super_glue_t5_prompt.py +17 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/tinyBenchmarks.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/tmlu.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/tmmluplus.py +80 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/translation.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/truthfulqa.py +76 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/truthfulqa_multi.py +24 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/turkishmmlu.py +30 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/unitxt.py +23 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/unscramble.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/winogender.py +16 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/wmdp.py +12 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/wmt14.py +16 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/wmt16.py +22 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/wsc273.py +9 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xcopa.py +21 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xnli.py +28 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xnli_eu.py +12 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xquad.py +22 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xstorycloze.py +22 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xwinograd.py +15 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +478 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +140 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +125 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aclue.py +171 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench.py +207 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench_hard.py +185 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/advanced.py +130 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aexams.py +184 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrimgsm.py +98 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrimmlu.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrixnli.py +129 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrobench_cot.py +88 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrobench_mc.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ag.py +134 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/agieval.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ai2_arc.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/anagrams1.py +81 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/anagrams2.py +81 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/anli.py +140 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabculture.py +180 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic.py +98 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_exams.py +104 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_complete.py +168 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_light.py +168 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabicmmlu.py +167 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aradice.py +268 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc.py +133 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_gen.py +101 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_mc.py +106 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/argument.py +134 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +122 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/assin.py +103 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/babi.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/basque_bench.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/basque_bench_gen.py +168 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/basque_bench_mc.py +139 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bbh.py +133 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bbq.py +169 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/belebele.py +181 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/benchmarks.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bertaqa.py +165 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhs.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhtc.py +143 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bigbench.py +170 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp.py +171 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp_nl.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq_seq2seq.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/c4.py +150 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cabbq.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cabreu.py +127 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/careqa.py +169 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalan_bench.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalan_bench_gen.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalan_bench_mc.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalanqa.py +171 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catcola.py +139 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ceval.py +223 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ceval_valid.py +163 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/chain.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/chartqa.py +238 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/claim.py +151 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/click.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cmmlu.py +166 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cnn.py +144 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cocoteros.py +148 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/code2text.py +161 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/code_x_glue.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/codexglue.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coedit.py +149 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cola.py +83 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/commonsense.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/commonsense_qa.py +127 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +124 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copal_id.py +169 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +162 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqcat.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/crows_pairs.py +158 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/csatqa.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cycle.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cycle_letters.py +81 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darija_bench.py +221 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darijahellaswag.py +174 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darijammlu.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/dbpedia.py +157 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/discrim_eval.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/doc.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +129 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/egyhellaswag.py +125 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/egymmlu.py +180 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/epec.py +142 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench.py +194 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench_ca.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench_es.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/esbbq.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/escola.py +85 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ethics.py +135 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ethos.py +99 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_exams.py +225 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_proficiency.py +159 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_reading.py +159 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_trivia.py +159 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/evalita_llm.py +166 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/evalita_sp.py +109 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/fda.py +105 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/financial.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/flan.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/fld.py +143 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/french_bench.py +202 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/french_bench_mc.py +98 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/french_bench_perplexity.py +86 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galcola.py +109 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galician_bench.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galician_bench_gen.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galician_bench_mc.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gaokao.py +141 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/glianorex.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_mmlu.py +171 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_piqa.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/glue.py +109 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gpqa.py +161 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gpt3.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/groundcocoa.py +184 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm.py +108 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +134 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/haerae.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +125 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_ethics.py +225 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_math.py +191 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/histoires_morales.py +179 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hle.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hrm8k.py +203 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/humaneval.py +124 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/humaneval_infilling.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/icelandic_winogrande.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ifeval.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse_scaling.py +192 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/iwslt2017.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ja.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard_gen.py +224 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard_mc.py +120 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/jsonschema_bench.py +123 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kbl.py +140 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu.py +168 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu_cot.py +88 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu_mc.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kobest.py +165 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kormedmcqa.py +160 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada.py +147 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_cloze.py +185 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_multilingual.py +185 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_multilingual_stablelm.py +141 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/law.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/leaderboard.py +194 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/libra.py +165 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lingoly.py +203 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livemathbench.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/llama3.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lm_syneval.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logieval.py +82 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbench.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbenchv2.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mastermind.py +203 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mathqa.py +137 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mbpp.py +123 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/med_concepts_qa.py +224 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/meddialog.py +180 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medical.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mediqa_qa2019.py +123 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medmcqa.py +169 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medtext.py +108 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mela.py +96 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/meqsum.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/metabench.py +154 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mgsm.py +122 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mimic_repsum.py +140 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/minerva_math.py +172 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mlqa.py +143 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu.py +144 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu_cot.py +88 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu_mc.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu_pro.py +145 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlusr.py +189 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmmu.py +150 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mnli.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/model_written_evals.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/moral_stories.py +151 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mts_dialog.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mts_dialog_perplexity.py +97 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multiblimp.py +134 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multilingual.py +106 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/non.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval.py +173 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_exact.py +157 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_gen.py +277 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_gen_exact.py +165 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_mc.py +228 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_mc_log_likelihoods.py +223 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noticia.py +105 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/nq_open.py +135 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi.py +27 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_arc_multilingual.py +167 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_hellaswag_multilingual.py +174 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_mmlu_multilingual.py +162 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_truthfulqa_multilingual.py +209 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/olaph.py +186 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/olaph_perplexity.py +97 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/option.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paloma.py +205 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/parafraseja.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/parafrases.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paws.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paws_x.py +154 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/persona.py +246 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/phrases.py +144 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/phrases_ca_va.py +82 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pile.py +161 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pile_10k.py +140 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +116 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/polemo2.py +135 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/polymath.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/portuguese_bench.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/portuguese_bench_gen.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/portuguese_bench_mc.py +103 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prompt.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper_bool.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnlieu.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/quac.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +124 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/random.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/realtoxicityprompts.py +124 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +125 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/reversed.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ruler.py +170 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/score.py +177 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/scrolls.py +161 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/scrolls_mc.py +157 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/self.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sglue.py +131 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sglue_rte.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/simple_cooccurrence_bias.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/siqa.py +209 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/spanish_bench.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/spanish_bench_gen.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/spanish_bench_mc.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad2.py +129 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad_completion.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sst2.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/storycloze.py +250 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/summarization.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super.py +107 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super_glue.py +154 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/superglue.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/supergpqa.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swde.py +179 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sycophancy.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/t0.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/teca.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyarc.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinybenchmarks.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinygsm8k.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyhellaswag.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinymmlu.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinytruthfulqa.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinywinogrande.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tmmluplus.py +181 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/toxigen.py +91 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/translation.py +149 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +130 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +120 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +140 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_multi.py +142 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turblimp_core.py +152 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu.py +161 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu_cot.py +104 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu_mc.py +102 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/twenty_newsgroups.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/unitxt.py +131 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/unscramble.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/vaxx.py +95 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +130 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +122 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wikitext.py +146 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogender.py +139 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wmdp.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wmt14.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wmt16.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc273.py +180 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xcopa.py +197 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xlsum.py +147 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +131 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xquad.py +203 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +129 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +124 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/yahoo.py +108 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/zhoblimp.py +155 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +56 -0
- wisent/core/data_loaders/__init__.py +235 -0
- wisent/core/data_loaders/core/__init__.py +0 -0
- wisent/core/data_loaders/core/atoms.py +99 -0
- wisent/core/data_loaders/loaders/__init__.py +0 -0
- wisent/core/data_loaders/loaders/custom.py +120 -0
- wisent/core/data_loaders/loaders/huggingface_loader.py +153 -0
- wisent/core/data_loaders/loaders/lm_loader.py +494 -0
- wisent/core/data_loaders/loaders/lm_loader_special_cases.py +496 -0
- wisent/core/data_loaders/loaders/task_interface_loader.py +300 -0
- wisent/core/data_loaders/rotator.py +118 -0
- wisent/core/detection_handling.py +259 -0
- wisent/core/diversity_processors.py +193 -0
- wisent/core/download_full_benchmarks.py +1512 -0
- wisent/core/errors/__init__.py +203 -0
- wisent/core/errors/error_codes.py +763 -0
- wisent/core/errors/error_handler.py +134 -0
- wisent/core/evaluators/__init__.py +0 -0
- wisent/core/evaluators/benchmark_specific/__init__.py +42 -0
- wisent/core/evaluators/benchmark_specific/aime_evaluator.py +90 -0
- wisent/core/evaluators/benchmark_specific/coding/__init__.py +0 -0
- wisent/core/evaluators/benchmark_specific/coding/metrics/__init__.py +0 -0
- wisent/core/evaluators/benchmark_specific/coding/metrics/core/__init__.py +0 -0
- wisent/core/evaluators/benchmark_specific/coding/metrics/core/atoms.py +36 -0
- wisent/core/evaluators/benchmark_specific/coding/metrics/evaluator.py +363 -0
- wisent/core/evaluators/benchmark_specific/coding/metrics/passk.py +67 -0
- wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/__init__.py +0 -0
- wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/core/__init__.py +0 -0
- wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/core/atoms.py +27 -0
- wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/cpp_sanitizer.py +62 -0
- wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/java_sanitizer.py +78 -0
- wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/python_sanitizer.py +94 -0
- wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/utils.py +126 -0
- wisent/core/evaluators/benchmark_specific/coding/providers/__init__.py +18 -0
- wisent/core/evaluators/benchmark_specific/coding/providers/core/__init__.py +0 -0
- wisent/core/evaluators/benchmark_specific/coding/providers/core/atoms.py +31 -0
- wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/__init__.py +3 -0
- wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/provider.py +305 -0
- wisent/core/evaluators/benchmark_specific/coding/safe_docker/Dockerfile +31 -0
- wisent/core/evaluators/benchmark_specific/coding/safe_docker/__init__.py +0 -0
- wisent/core/evaluators/benchmark_specific/coding/safe_docker/core/__init__.py +0 -0
- wisent/core/evaluators/benchmark_specific/coding/safe_docker/core/atoms.py +105 -0
- wisent/core/evaluators/benchmark_specific/coding/safe_docker/core/runtime.py +143 -0
- wisent/core/evaluators/benchmark_specific/coding/safe_docker/entrypoint.py +121 -0
- wisent/core/evaluators/benchmark_specific/coding/safe_docker/recipes.py +60 -0
- wisent/core/evaluators/benchmark_specific/coding/solution_generator.py +258 -0
- wisent/core/evaluators/benchmark_specific/conala_evaluator.py +332 -0
- wisent/core/evaluators/benchmark_specific/exact_match_evaluator.py +81 -0
- wisent/core/evaluators/benchmark_specific/f1_evaluator.py +173 -0
- wisent/core/evaluators/benchmark_specific/generation_evaluator.py +488 -0
- wisent/core/evaluators/benchmark_specific/livemathbench_evaluator.py +393 -0
- wisent/core/evaluators/benchmark_specific/log_likelihoods_evaluator.py +202 -0
- wisent/core/evaluators/benchmark_specific/math_evaluator.py +119 -0
- wisent/core/evaluators/benchmark_specific/math_parsing/__init__.py +1 -0
- wisent/core/evaluators/benchmark_specific/math_parsing/core.py +1640 -0
- wisent/core/evaluators/benchmark_specific/math_parsing/extract_boxed.py +48 -0
- wisent/core/evaluators/benchmark_specific/math_parsing/is_equiv.py +159 -0
- wisent/core/evaluators/benchmark_specific/math_parsing/scripts.py +919 -0
- wisent/core/evaluators/benchmark_specific/perplexity_evaluator.py +175 -0
- wisent/core/evaluators/benchmark_specific/polymath_evaluator.py +114 -0
- wisent/core/evaluators/core/__init__.py +5 -0
- wisent/core/evaluators/core/atoms.py +166 -0
- wisent/core/evaluators/custom/__init__.py +20 -0
- wisent/core/evaluators/custom/custom_evaluator.py +382 -0
- wisent/core/evaluators/custom/examples/__init__.py +37 -0
- wisent/core/evaluators/custom/examples/desklib_detector.py +166 -0
- wisent/core/evaluators/custom/examples/gptzero.py +185 -0
- wisent/core/evaluators/custom/examples/humanization.py +79 -0
- wisent/core/evaluators/custom/examples/humanization_coherent.py +127 -0
- wisent/core/evaluators/custom/examples/roberta_detector.py +173 -0
- wisent/core/evaluators/oracles/__init__.py +0 -0
- wisent/core/evaluators/oracles/interactive.py +73 -0
- wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
- wisent/core/evaluators/oracles/truthfulqa_gen_evaluator.py +168 -0
- wisent/core/evaluators/oracles/user_specified.py +67 -0
- wisent/core/evaluators/personalization/__init__.py +12 -0
- wisent/core/evaluators/personalization/alignment.py +166 -0
- wisent/core/evaluators/personalization/coherence.py +325 -0
- wisent/core/evaluators/personalization/difference.py +73 -0
- wisent/core/evaluators/rotator.py +217 -0
- wisent/core/evaluators/steering_evaluators.py +386 -0
- wisent/core/evaluators/synthetic_evaluator.py +377 -0
- wisent/core/hyperparameter_optimizer.py +547 -0
- wisent/core/layer.py +17 -0
- wisent/core/lm_eval_harness_ground_truth.py +1431 -0
- wisent/core/main.py +101 -0
- wisent/core/managed_cached_benchmarks.py +609 -0
- wisent/core/mixed_benchmark_sampler.py +366 -0
- wisent/core/modalities/__init__.py +545 -0
- wisent/core/model_persistence.py +302 -0
- wisent/core/models/__init__.py +23 -0
- wisent/core/models/core/__init__.py +0 -0
- wisent/core/models/core/atoms.py +465 -0
- wisent/core/models/inference_config.py +127 -0
- wisent/core/models/wisent_model.py +893 -0
- wisent/core/multi_steering.py +397 -0
- wisent/core/opti/__init__.py +0 -0
- wisent/core/opti/core/__init__.py +0 -0
- wisent/core/opti/core/atoms.py +177 -0
- wisent/core/opti/methods/__init__.py +10 -0
- wisent/core/opti/methods/opti_classificator.py +172 -0
- wisent/core/opti/methods/opti_steering.py +139 -0
- wisent/core/opti/methods/opti_weights.py +523 -0
- wisent/core/optuna/__init__.py +54 -0
- wisent/core/optuna/classifier/__init__.py +25 -0
- wisent/core/optuna/classifier/activation_generator.py +351 -0
- wisent/core/optuna/classifier/classifier_cache.py +509 -0
- wisent/core/optuna/classifier/optuna_classifier_optimizer.py +685 -0
- wisent/core/optuna/steering/__init__.py +20 -0
- wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +200 -0
- wisent/core/optuna/steering/data_utils.py +342 -0
- wisent/core/optuna/steering/metrics.py +412 -0
- wisent/core/optuna/steering/steering_optimization.py +1096 -0
- wisent/core/parser.py +1662 -0
- wisent/core/parser_arguments/__init__.py +10 -0
- wisent/core/parser_arguments/agent_parser.py +122 -0
- wisent/core/parser_arguments/check_linearity_parser.py +82 -0
- wisent/core/parser_arguments/configure_model_parser.py +7 -0
- wisent/core/parser_arguments/create_steering_vector_parser.py +67 -0
- wisent/core/parser_arguments/diagnose_pairs_parser.py +25 -0
- wisent/core/parser_arguments/diagnose_vectors_parser.py +72 -0
- wisent/core/parser_arguments/evaluate_parser.py +40 -0
- wisent/core/parser_arguments/evaluate_refusal_parser.py +32 -0
- wisent/core/parser_arguments/evaluate_responses_parser.py +12 -0
- wisent/core/parser_arguments/full_optimize_parser.py +194 -0
- wisent/core/parser_arguments/generate_pairs_from_task_parser.py +33 -0
- wisent/core/parser_arguments/generate_pairs_parser.py +43 -0
- wisent/core/parser_arguments/generate_responses_parser.py +16 -0
- wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +148 -0
- wisent/core/parser_arguments/generate_vector_from_task_parser.py +149 -0
- wisent/core/parser_arguments/generate_vector_parser.py +89 -0
- wisent/core/parser_arguments/get_activations_parser.py +90 -0
- wisent/core/parser_arguments/inference_config_parser.py +65 -0
- wisent/core/parser_arguments/main_parser.py +220 -0
- wisent/core/parser_arguments/model_config_parser.py +59 -0
- wisent/core/parser_arguments/modify_weights_parser.py +309 -0
- wisent/core/parser_arguments/monitor_parser.py +17 -0
- wisent/core/parser_arguments/multi_steer_parser.py +48 -0
- wisent/core/parser_arguments/nonsense_parser.py +26 -0
- wisent/core/parser_arguments/optimization_cache_parser.py +64 -0
- wisent/core/parser_arguments/optimize_classification_parser.py +108 -0
- wisent/core/parser_arguments/optimize_parser.py +142 -0
- wisent/core/parser_arguments/optimize_sample_size_parser.py +58 -0
- wisent/core/parser_arguments/optimize_steering_parser.py +617 -0
- wisent/core/parser_arguments/optimize_weights_parser.py +403 -0
- wisent/core/parser_arguments/synthetic_parser.py +117 -0
- wisent/core/parser_arguments/tasks_parser.py +591 -0
- wisent/core/parser_arguments/train_unified_goodness_parser.py +172 -0
- wisent/core/parser_arguments/utils.py +107 -0
- wisent/core/prompts/__init__.py +0 -0
- wisent/core/prompts/core/__init__.py +0 -0
- wisent/core/prompts/core/atom.py +57 -0
- wisent/core/prompts/core/prompt_formater.py +148 -0
- wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
- wisent/core/prompts/prompt_stratiegies/direct_completion.py +26 -0
- wisent/core/prompts/prompt_stratiegies/instruction_following.py +26 -0
- wisent/core/prompts/prompt_stratiegies/multiple_choice.py +31 -0
- wisent/core/prompts/prompt_stratiegies/role_playing.py +33 -0
- wisent/core/representation.py +5 -0
- wisent/core/save_results.py +277 -0
- wisent/core/steering.py +660 -0
- wisent/core/steering_method.py +20 -0
- wisent/core/steering_methods/__init__.py +54 -0
- wisent/core/steering_methods/core/__init__.py +0 -0
- wisent/core/steering_methods/core/atoms.py +154 -0
- wisent/core/steering_methods/methods/__init__.py +0 -0
- wisent/core/steering_methods/methods/caa.py +45 -0
- wisent/core/steering_methods/methods/prism.py +588 -0
- wisent/core/steering_methods/methods/pulse.py +641 -0
- wisent/core/steering_methods/methods/titan.py +1005 -0
- wisent/core/steering_methods/preflight.py +322 -0
- wisent/core/steering_methods/registry.py +649 -0
- wisent/core/steering_methods/rotator.py +121 -0
- wisent/core/steering_optimizer.py +1503 -0
- wisent/core/synthetic/__init__.py +0 -0
- wisent/core/synthetic/cleaners/__init__.py +0 -0
- wisent/core/synthetic/cleaners/core/__init__.py +0 -0
- wisent/core/synthetic/cleaners/core/atoms.py +58 -0
- wisent/core/synthetic/cleaners/deduper_cleaner.py +53 -0
- wisent/core/synthetic/cleaners/methods/__init__.py +0 -0
- wisent/core/synthetic/cleaners/methods/base_dedupers.py +321 -0
- wisent/core/synthetic/cleaners/methods/base_refusalers.py +286 -0
- wisent/core/synthetic/cleaners/methods/core/__init__.py +0 -0
- wisent/core/synthetic/cleaners/methods/core/atoms.py +47 -0
- wisent/core/synthetic/cleaners/pairs_cleaner.py +90 -0
- wisent/core/synthetic/cleaners/refusaler_cleaner.py +133 -0
- wisent/core/synthetic/db_instructions/__init__.py +0 -0
- wisent/core/synthetic/db_instructions/core/__init__.py +0 -0
- wisent/core/synthetic/db_instructions/core/atoms.py +25 -0
- wisent/core/synthetic/db_instructions/mini_dp.py +115 -0
- wisent/core/synthetic/generators/__init__.py +0 -0
- wisent/core/synthetic/generators/core/__init__.py +0 -0
- wisent/core/synthetic/generators/core/atoms.py +73 -0
- wisent/core/synthetic/generators/diversities/__init__.py +0 -0
- wisent/core/synthetic/generators/diversities/core/__init__.py +0 -0
- wisent/core/synthetic/generators/diversities/core/core.py +68 -0
- wisent/core/synthetic/generators/diversities/methods/__init__.py +0 -0
- wisent/core/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
- wisent/core/synthetic/generators/nonsense_generator.py +150 -0
- wisent/core/synthetic/generators/pairs_generator.py +313 -0
- wisent/core/task_interface.py +143 -0
- wisent/core/task_selector.py +232 -0
- wisent/core/tasks/__init__.py +218 -0
- wisent/core/tasks/aime_task.py +142 -0
- wisent/core/tasks/file_task.py +212 -0
- wisent/core/tasks/hle_task.py +180 -0
- wisent/core/tasks/hmmt_task.py +120 -0
- wisent/core/tasks/livecodebench_task.py +94 -0
- wisent/core/tasks/livemathbench_task.py +159 -0
- wisent/core/tasks/lm_eval_task.py +611 -0
- wisent/core/tasks/math500_task.py +84 -0
- wisent/core/tasks/polymath_task.py +147 -0
- wisent/core/tasks/supergpqa_task.py +220 -0
- wisent/core/time_estimator.py +155 -0
- wisent/core/timing_calibration.py +176 -0
- wisent/core/tracking/__init__.py +54 -0
- wisent/core/tracking/latency.py +620 -0
- wisent/core/tracking/memory.py +360 -0
- wisent/core/trainers/__init__.py +0 -0
- wisent/core/trainers/core/__init__.py +11 -0
- wisent/core/trainers/core/atoms.py +45 -0
- wisent/core/trainers/steering_trainer.py +365 -0
- wisent/core/universal_subspace.py +918 -0
- wisent/core/user_model_config.py +158 -0
- wisent/core/utils/__init__.py +64 -0
- wisent/core/utils/base_rotator.py +292 -0
- wisent/core/utils/dataset_splits.py +197 -0
- wisent/core/utils/device.py +279 -0
- wisent/core/weight_modification/__init__.py +134 -0
- wisent/core/weight_modification/additive.py +340 -0
- wisent/core/weight_modification/directional.py +1357 -0
- wisent/core/weight_modification/export.py +359 -0
- wisent/core/weight_modification/multi_direction.py +410 -0
- wisent/core/weight_modification/utils.py +236 -0
- wisent/core/wisent.py +660 -0
- wisent/examples/contrastive_pairs/humanization_human_vs_ai.json +2112 -0
- wisent/examples/scripts/1/test_basqueglue_evaluation.json +51 -0
- wisent/examples/scripts/1/test_basqueglue_pairs.json +14 -0
- wisent/examples/scripts/1/test_bec2016eu_evaluation.json +51 -0
- wisent/examples/scripts/1/test_bec2016eu_pairs.json +14 -0
- wisent/examples/scripts/1/test_belebele_evaluation.json +51 -0
- wisent/examples/scripts/1/test_belebele_pairs.json +14 -0
- wisent/examples/scripts/1/test_benchmarks_evaluation.json +51 -0
- wisent/examples/scripts/1/test_benchmarks_pairs.json +14 -0
- wisent/examples/scripts/1/test_bertaqa_evaluation.json +51 -0
- wisent/examples/scripts/1/test_bertaqa_pairs.json +14 -0
- wisent/examples/scripts/1/test_bhtc_v2_evaluation.json +30 -0
- wisent/examples/scripts/1/test_bhtc_v2_pairs.json +8 -0
- wisent/examples/scripts/1/test_boolq-seq2seq_evaluation.json +30 -0
- wisent/examples/scripts/1/test_boolq-seq2seq_pairs.json +8 -0
- wisent/examples/scripts/1/test_cabreu_evaluation.json +30 -0
- wisent/examples/scripts/1/test_cabreu_pairs.json +8 -0
- wisent/examples/scripts/1/test_careqa_en_evaluation.json +30 -0
- wisent/examples/scripts/1/test_careqa_en_pairs.json +8 -0
- wisent/examples/scripts/1/test_careqa_evaluation.json +30 -0
- wisent/examples/scripts/1/test_careqa_pairs.json +8 -0
- wisent/examples/scripts/1/test_catalanqa_evaluation.json +30 -0
- wisent/examples/scripts/1/test_catalanqa_pairs.json +8 -0
- wisent/examples/scripts/1/test_catcola_evaluation.json +30 -0
- wisent/examples/scripts/1/test_catcola_pairs.json +8 -0
- wisent/examples/scripts/1/test_chartqa_evaluation.json +30 -0
- wisent/examples/scripts/1/test_chartqa_pairs.json +8 -0
- wisent/examples/scripts/1/test_claim_stance_topic_evaluation.json +30 -0
- wisent/examples/scripts/1/test_claim_stance_topic_pairs.json +8 -0
- wisent/examples/scripts/1/test_cnn_dailymail_evaluation.json +30 -0
- wisent/examples/scripts/1/test_cnn_dailymail_pairs.json +8 -0
- wisent/examples/scripts/1/test_cocoteros_es_evaluation.json +30 -0
- wisent/examples/scripts/1/test_cocoteros_es_pairs.json +8 -0
- wisent/examples/scripts/1/test_coedit_gec_evaluation.json +30 -0
- wisent/examples/scripts/1/test_coedit_gec_pairs.json +8 -0
- wisent/examples/scripts/1/test_cola_evaluation.json +30 -0
- wisent/examples/scripts/1/test_cola_pairs.json +8 -0
- wisent/examples/scripts/1/test_coqcat_evaluation.json +30 -0
- wisent/examples/scripts/1/test_coqcat_pairs.json +8 -0
- wisent/examples/scripts/1/test_dbpedia_14_evaluation.json +30 -0
- wisent/examples/scripts/1/test_dbpedia_14_pairs.json +8 -0
- wisent/examples/scripts/1/test_epec_koref_bin_evaluation.json +30 -0
- wisent/examples/scripts/1/test_epec_koref_bin_pairs.json +8 -0
- wisent/examples/scripts/1/test_ethos_binary_evaluation.json +30 -0
- wisent/examples/scripts/1/test_ethos_binary_pairs.json +8 -0
- wisent/examples/scripts/2/test_afrimgsm_direct_amh_evaluation.json +30 -0
- wisent/examples/scripts/2/test_afrimgsm_direct_amh_pairs.json +8 -0
- wisent/examples/scripts/2/test_afrimmlu_direct_amh_evaluation.json +30 -0
- wisent/examples/scripts/2/test_afrimmlu_direct_amh_pairs.json +8 -0
- wisent/examples/scripts/2/test_afrixnli_en_direct_amh_evaluation.json +30 -0
- wisent/examples/scripts/2/test_afrixnli_en_direct_amh_pairs.json +8 -0
- wisent/examples/scripts/2/test_arc_ar_evaluation.json +30 -0
- wisent/examples/scripts/2/test_arc_ar_pairs.json +8 -0
- wisent/examples/scripts/2/test_atis_evaluation.json +30 -0
- wisent/examples/scripts/2/test_atis_pairs.json +8 -0
- wisent/examples/scripts/2/test_babi_evaluation.json +30 -0
- wisent/examples/scripts/2/test_babi_pairs.json +8 -0
- wisent/examples/scripts/2/test_babilong_evaluation.json +30 -0
- wisent/examples/scripts/2/test_babilong_pairs.json +8 -0
- wisent/examples/scripts/2/test_bangla_mmlu_evaluation.json +30 -0
- wisent/examples/scripts/2/test_bangla_mmlu_pairs.json +8 -0
- wisent/examples/scripts/2/test_basque-glue_pairs.json +14 -0
- wisent/examples/scripts/benchmark_tags.json +2140 -0
- wisent/examples/scripts/lm_eval_readme.json +4 -0
- wisent/examples/scripts/results/benchmark_descriptions.json +1244 -0
- wisent/examples/scripts/results/benchmark_evaluation_methods.json +66 -0
- wisent/examples/scripts/results/benchmark_evaluator_mapping.json +2781 -0
- wisent/examples/scripts/results/benchmark_evaluator_mapping_updated.json +30536 -0
- wisent/examples/scripts/results/benchmark_evaluators_clean.json +469 -0
- wisent/examples/scripts/results/benchmark_methods_summary.json +260 -0
- wisent/examples/scripts/results/benchmark_pair_creation_methods.json +66 -0
- wisent/examples/scripts/results/benchmark_pair_totals.json +269 -0
- wisent/examples/scripts/results/benchmark_tags.json +917 -0
- wisent/examples/scripts/results/benchmark_test_summary_nov4.json +71 -0
- wisent/examples/scripts/results/coding_benchmarks_test_code_status.json +150 -0
- wisent/examples/scripts/results/failing_benchmarks.json +946 -0
- wisent/examples/scripts/results/failing_benchmarks_list.json +41 -0
- wisent/examples/scripts/results/failing_benchmarks_test_results.json +945 -0
- wisent/examples/scripts/results/missing_benchmark_tags.json +341 -0
- wisent/examples/scripts/results/test_20_newsgroups_evaluation.json +30 -0
- wisent/examples/scripts/results/test_20_newsgroups_pairs.json +8 -0
- wisent/examples/scripts/results/test_AraDICE_evaluation.json +51 -0
- wisent/examples/scripts/results/test_AraDICE_pairs.json +14 -0
- wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_evaluation.json +30 -0
- wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_pairs.json +8 -0
- wisent/examples/scripts/results/test_ArabCulture_evaluation.json +51 -0
- wisent/examples/scripts/results/test_ArabCulture_pairs.json +14 -0
- wisent/examples/scripts/results/test_Tag_evaluation.json +30 -0
- wisent/examples/scripts/results/test_Tag_pairs.json +8 -0
- wisent/examples/scripts/results/test_aclue_evaluation.json +51 -0
- wisent/examples/scripts/results/test_aclue_pairs.json +14 -0
- wisent/examples/scripts/results/test_acp_bench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_acp_bench_hard_evaluation.json +51 -0
- wisent/examples/scripts/results/test_acp_bench_hard_pairs.json +14 -0
- wisent/examples/scripts/results/test_acp_bench_pairs.json +14 -0
- wisent/examples/scripts/results/test_advanced_ai_risk_evaluation.json +51 -0
- wisent/examples/scripts/results/test_advanced_ai_risk_pairs.json +14 -0
- wisent/examples/scripts/results/test_aexams_evaluation.json +51 -0
- wisent/examples/scripts/results/test_aexams_pairs.json +14 -0
- wisent/examples/scripts/results/test_afrimgsm_direct_amh_evaluation.json +30 -0
- wisent/examples/scripts/results/test_afrimgsm_direct_amh_pairs.json +8 -0
- wisent/examples/scripts/results/test_afrimmlu_direct_amh_evaluation.json +30 -0
- wisent/examples/scripts/results/test_afrimmlu_direct_amh_pairs.json +8 -0
- wisent/examples/scripts/results/test_afrixnli_en_direct_amh_evaluation.json +30 -0
- wisent/examples/scripts/results/test_afrixnli_en_direct_amh_pairs.json +8 -0
- wisent/examples/scripts/results/test_ag_news_evaluation.json +30 -0
- wisent/examples/scripts/results/test_ag_news_pairs.json +8 -0
- wisent/examples/scripts/results/test_agieval_evaluation.json +51 -0
- wisent/examples/scripts/results/test_agieval_pairs.json +14 -0
- wisent/examples/scripts/results/test_aime2024_evaluation.json +30 -0
- wisent/examples/scripts/results/test_aime2024_pairs.json +8 -0
- wisent/examples/scripts/results/test_aime2025_evaluation.json +30 -0
- wisent/examples/scripts/results/test_aime2025_pairs.json +8 -0
- wisent/examples/scripts/results/test_aime_evaluation.json +30 -0
- wisent/examples/scripts/results/test_aime_pairs.json +8 -0
- wisent/examples/scripts/results/test_anagrams1_evaluation.json +30 -0
- wisent/examples/scripts/results/test_anagrams1_pairs.json +8 -0
- wisent/examples/scripts/results/test_anagrams2_evaluation.json +30 -0
- wisent/examples/scripts/results/test_anagrams2_pairs.json +8 -0
- wisent/examples/scripts/results/test_anli_evaluation.json +30 -0
- wisent/examples/scripts/results/test_anli_pairs.json +8 -0
- wisent/examples/scripts/results/test_apps_evaluation.json +30 -0
- wisent/examples/scripts/results/test_apps_pairs.json +8 -0
- wisent/examples/scripts/results/test_arabic_exams_evaluation.json +30 -0
- wisent/examples/scripts/results/test_arabic_exams_pairs.json +8 -0
- wisent/examples/scripts/results/test_arabic_leaderboard_complete_evaluation.json +51 -0
- wisent/examples/scripts/results/test_arabic_leaderboard_complete_pairs.json +14 -0
- wisent/examples/scripts/results/test_arabic_leaderboard_light_evaluation.json +51 -0
- wisent/examples/scripts/results/test_arabic_leaderboard_light_pairs.json +14 -0
- wisent/examples/scripts/results/test_arabicmmlu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_arabicmmlu_pairs.json +14 -0
- wisent/examples/scripts/results/test_aradice/test_aradice_evaluation.json +51 -0
- wisent/examples/scripts/results/test_aradice/test_aradice_pairs.json +14 -0
- wisent/examples/scripts/results/test_aradice3/test_aradice_evaluation.json +51 -0
- wisent/examples/scripts/results/test_aradice3/test_aradice_pairs.json +14 -0
- wisent/examples/scripts/results/test_arc_ar_evaluation.json +30 -0
- wisent/examples/scripts/results/test_arc_ar_pairs.json +8 -0
- wisent/examples/scripts/results/test_arc_challenge_evaluation.json +30 -0
- wisent/examples/scripts/results/test_arc_challenge_pairs.json +8 -0
- wisent/examples/scripts/results/test_arc_easy_evaluation.json +30 -0
- wisent/examples/scripts/results/test_arc_easy_pairs.json +8 -0
- wisent/examples/scripts/results/test_argument_topic_evaluation.json +30 -0
- wisent/examples/scripts/results/test_argument_topic_pairs.json +8 -0
- wisent/examples/scripts/results/test_arithmetic_evaluation.json +51 -0
- wisent/examples/scripts/results/test_arithmetic_pairs.json +14 -0
- wisent/examples/scripts/results/test_asdiv_evaluation.json +30 -0
- wisent/examples/scripts/results/test_asdiv_pairs.json +8 -0
- wisent/examples/scripts/results/test_assin_entailment_evaluation.json +30 -0
- wisent/examples/scripts/results/test_assin_entailment_pairs.json +8 -0
- wisent/examples/scripts/results/test_atis_evaluation.json +30 -0
- wisent/examples/scripts/results/test_atis_pairs.json +8 -0
- wisent/examples/scripts/results/test_babi_evaluation.json +30 -0
- wisent/examples/scripts/results/test_babi_pairs.json +8 -0
- wisent/examples/scripts/results/test_babilong_evaluation.json +30 -0
- wisent/examples/scripts/results/test_babilong_pairs.json +8 -0
- wisent/examples/scripts/results/test_bangla_mmlu_evaluation.json +30 -0
- wisent/examples/scripts/results/test_bangla_mmlu_pairs.json +8 -0
- wisent/examples/scripts/results/test_banking77_evaluation.json +30 -0
- wisent/examples/scripts/results/test_banking77_pairs.json +8 -0
- wisent/examples/scripts/results/test_basque/test_basque-glue_pairs.json +14 -0
- wisent/examples/scripts/results/test_basque-glue_evaluation.json +51 -0
- wisent/examples/scripts/results/test_basque-glue_pairs.json +14 -0
- wisent/examples/scripts/results/test_basque2/test_basque-glue_evaluation.json +51 -0
- wisent/examples/scripts/results/test_basque2/test_basque-glue_pairs.json +14 -0
- wisent/examples/scripts/results/test_basque_bench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_basque_bench_pairs.json +14 -0
- wisent/examples/scripts/results/test_basque_glue/test_basque-glue_evaluation.json +51 -0
- wisent/examples/scripts/results/test_basque_glue/test_basque-glue_pairs.json +14 -0
- wisent/examples/scripts/results/test_basqueglue_evaluation.json +51 -0
- wisent/examples/scripts/results/test_basqueglue_pairs.json +14 -0
- wisent/examples/scripts/results/test_bbh_evaluation.json +51 -0
- wisent/examples/scripts/results/test_bbh_pairs.json +14 -0
- wisent/examples/scripts/results/test_bbq_evaluation.json +30 -0
- wisent/examples/scripts/results/test_bbq_pairs.json +8 -0
- wisent/examples/scripts/results/test_bec2016eu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_bec2016eu_pairs.json +14 -0
- wisent/examples/scripts/results/test_belebele_evaluation.json +51 -0
- wisent/examples/scripts/results/test_belebele_pairs.json +14 -0
- wisent/examples/scripts/results/test_benchmarks_evaluation.json +51 -0
- wisent/examples/scripts/results/test_benchmarks_pairs.json +14 -0
- wisent/examples/scripts/results/test_bertaqa_evaluation.json +51 -0
- wisent/examples/scripts/results/test_bertaqa_pairs.json +14 -0
- wisent/examples/scripts/results/test_bhtc_v2_evaluation.json +30 -0
- wisent/examples/scripts/results/test_bhtc_v2_pairs.json +8 -0
- wisent/examples/scripts/results/test_bigbench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_bigbench_pairs.json +14 -0
- wisent/examples/scripts/results/test_blimp_evaluation.json +51 -0
- wisent/examples/scripts/results/test_blimp_pairs.json +14 -0
- wisent/examples/scripts/results/test_boolq/test_boolq_evaluation.json +30 -0
- wisent/examples/scripts/results/test_boolq/test_boolq_pairs.json +8 -0
- wisent/examples/scripts/results/test_boolq-seq2seq_evaluation.json +30 -0
- wisent/examples/scripts/results/test_boolq-seq2seq_pairs.json +8 -0
- wisent/examples/scripts/results/test_boolq_evaluation.json +30 -0
- wisent/examples/scripts/results/test_boolq_pairs.json +8 -0
- wisent/examples/scripts/results/test_c4_evaluation.json +30 -0
- wisent/examples/scripts/results/test_c4_pairs.json +8 -0
- wisent/examples/scripts/results/test_cabreu_evaluation.json +30 -0
- wisent/examples/scripts/results/test_cabreu_pairs.json +8 -0
- wisent/examples/scripts/results/test_careqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_careqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_catalan_bench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_catalan_bench_pairs.json +14 -0
- wisent/examples/scripts/results/test_catalanqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_catalanqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_catcola_evaluation.json +30 -0
- wisent/examples/scripts/results/test_catcola_pairs.json +8 -0
- wisent/examples/scripts/results/test_cb_evaluation.json +30 -0
- wisent/examples/scripts/results/test_cb_pairs.json +8 -0
- wisent/examples/scripts/results/test_ceval/test_ceval_evaluation.json +51 -0
- wisent/examples/scripts/results/test_ceval/test_ceval_pairs.json +14 -0
- wisent/examples/scripts/results/test_ceval_accountant/test_ceval-valid_accountant_evaluation.json +30 -0
- wisent/examples/scripts/results/test_ceval_accountant/test_ceval-valid_accountant_pairs.json +8 -0
- wisent/examples/scripts/results/test_ceval_evaluation.json +51 -0
- wisent/examples/scripts/results/test_ceval_pairs.json +14 -0
- wisent/examples/scripts/results/test_ceval_valid/test_ceval_valid_evaluation.json +51 -0
- wisent/examples/scripts/results/test_ceval_valid/test_ceval_valid_pairs.json +14 -0
- wisent/examples/scripts/results/test_chain_of_thought_evaluation.json +51 -0
- wisent/examples/scripts/results/test_chain_of_thought_pairs.json +14 -0
- wisent/examples/scripts/results/test_chartqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_chartqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_claim_stance_topic_evaluation.json +30 -0
- wisent/examples/scripts/results/test_claim_stance_topic_pairs.json +8 -0
- wisent/examples/scripts/results/test_cmmlu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_cmmlu_pairs.json +14 -0
- wisent/examples/scripts/results/test_cnn_dailymail_evaluation.json +30 -0
- wisent/examples/scripts/results/test_cnn_dailymail_pairs.json +8 -0
- wisent/examples/scripts/results/test_cocoteros_es_evaluation.json +30 -0
- wisent/examples/scripts/results/test_cocoteros_es_pairs.json +8 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_go_evaluation.json +30 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_go_pairs.json +8 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_java_evaluation.json +30 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_java_pairs.json +8 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_javascript_evaluation.json +30 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_javascript_pairs.json +8 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_php_evaluation.json +30 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_php_pairs.json +8 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_python_evaluation.json +30 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_python_pairs.json +8 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_ruby_evaluation.json +30 -0
- wisent/examples/scripts/results/test_codexglue_code_to_text_ruby_pairs.json +8 -0
- wisent/examples/scripts/results/test_coedit_gec_evaluation.json +30 -0
- wisent/examples/scripts/results/test_coedit_gec_pairs.json +8 -0
- wisent/examples/scripts/results/test_cola_evaluation.json +30 -0
- wisent/examples/scripts/results/test_cola_pairs.json +8 -0
- wisent/examples/scripts/results/test_commonsense_qa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_commonsense_qa_pairs.json +8 -0
- wisent/examples/scripts/results/test_conala_evaluation.json +30 -0
- wisent/examples/scripts/results/test_conala_pairs.json +8 -0
- wisent/examples/scripts/results/test_concode_evaluation.json +30 -0
- wisent/examples/scripts/results/test_concode_pairs.json +8 -0
- wisent/examples/scripts/results/test_copa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_copa_pairs.json +8 -0
- wisent/examples/scripts/results/test_copal_id_evaluation.json +30 -0
- wisent/examples/scripts/results/test_copal_id_pairs.json +8 -0
- wisent/examples/scripts/results/test_coqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_coqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_coqcat_evaluation.json +30 -0
- wisent/examples/scripts/results/test_coqcat_pairs.json +8 -0
- wisent/examples/scripts/results/test_crows_pairs_evaluation.json +51 -0
- wisent/examples/scripts/results/test_crows_pairs_pairs.json +14 -0
- wisent/examples/scripts/results/test_csatqa_evaluation.json +51 -0
- wisent/examples/scripts/results/test_csatqa_pairs.json +14 -0
- wisent/examples/scripts/results/test_cycle_letters_evaluation.json +30 -0
- wisent/examples/scripts/results/test_cycle_letters_pairs.json +8 -0
- wisent/examples/scripts/results/test_darija_bench/test_darija_bench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_darija_bench/test_darija_bench_pairs.json +14 -0
- wisent/examples/scripts/results/test_darija_bench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_darija_bench_pairs.json +14 -0
- wisent/examples/scripts/results/test_darijahellaswag_evaluation.json +30 -0
- wisent/examples/scripts/results/test_darijahellaswag_pairs.json +8 -0
- wisent/examples/scripts/results/test_darijammlu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_darijammlu_pairs.json +14 -0
- wisent/examples/scripts/results/test_dbpedia_14_evaluation.json +30 -0
- wisent/examples/scripts/results/test_dbpedia_14_pairs.json +8 -0
- wisent/examples/scripts/results/test_drop_evaluation.json +30 -0
- wisent/examples/scripts/results/test_drop_pairs.json +8 -0
- wisent/examples/scripts/results/test_ds1000_evaluation.json +30 -0
- wisent/examples/scripts/results/test_ds1000_pairs.json +8 -0
- wisent/examples/scripts/results/test_egyhellaswag_evaluation.json +30 -0
- wisent/examples/scripts/results/test_egyhellaswag_pairs.json +8 -0
- wisent/examples/scripts/results/test_egymmlu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_egymmlu_pairs.json +14 -0
- wisent/examples/scripts/results/test_epec_koref_bin_evaluation.json +30 -0
- wisent/examples/scripts/results/test_epec_koref_bin_pairs.json +8 -0
- wisent/examples/scripts/results/test_eq_bench_evaluation.json +30 -0
- wisent/examples/scripts/results/test_eq_bench_pairs.json +8 -0
- wisent/examples/scripts/results/test_escola_evaluation.json +30 -0
- wisent/examples/scripts/results/test_escola_pairs.json +8 -0
- wisent/examples/scripts/results/test_ethics_cm_evaluation.json +30 -0
- wisent/examples/scripts/results/test_ethics_cm_pairs.json +8 -0
- wisent/examples/scripts/results/test_ethos_binary_evaluation.json +30 -0
- wisent/examples/scripts/results/test_ethos_binary_pairs.json +8 -0
- wisent/examples/scripts/results/test_eus_exams/test_eus_exams_evaluation.json +51 -0
- wisent/examples/scripts/results/test_eus_exams/test_eus_exams_pairs.json +14 -0
- wisent/examples/scripts/results/test_eus_exams_es_evaluation.json +51 -0
- wisent/examples/scripts/results/test_eus_exams_es_pairs.json +14 -0
- wisent/examples/scripts/results/test_eus_exams_evaluation.json +51 -0
- wisent/examples/scripts/results/test_eus_exams_pairs.json +14 -0
- wisent/examples/scripts/results/test_eus_proficiency_evaluation.json +30 -0
- wisent/examples/scripts/results/test_eus_proficiency_pairs.json +8 -0
- wisent/examples/scripts/results/test_eus_reading_evaluation.json +30 -0
- wisent/examples/scripts/results/test_eus_reading_pairs.json +8 -0
- wisent/examples/scripts/results/test_eus_trivia_evaluation.json +30 -0
- wisent/examples/scripts/results/test_eus_trivia_pairs.json +8 -0
- wisent/examples/scripts/results/test_evalita-mp_evaluation.json +51 -0
- wisent/examples/scripts/results/test_evalita-mp_pairs.json +14 -0
- wisent/examples/scripts/results/test_evalita-sp_sum_task_fp-small_p1_evaluation.json +30 -0
- wisent/examples/scripts/results/test_evalita-sp_sum_task_fp-small_p1_pairs.json +8 -0
- wisent/examples/scripts/results/test_evalita_LLM_evaluation.json +51 -0
- wisent/examples/scripts/results/test_evalita_LLM_pairs.json +14 -0
- wisent/examples/scripts/results/test_evalita_llm/test_evalita_llm_evaluation.json +51 -0
- wisent/examples/scripts/results/test_evalita_llm/test_evalita_llm_pairs.json +14 -0
- wisent/examples/scripts/results/test_evalita_mp/test_evalita-mp_te_prompt-1_evaluation.json +30 -0
- wisent/examples/scripts/results/test_evalita_mp/test_evalita-mp_te_prompt-1_pairs.json +8 -0
- wisent/examples/scripts/results/test_evalita_mp2/test_evalita_mp_evaluation.json +51 -0
- wisent/examples/scripts/results/test_evalita_mp2/test_evalita_mp_pairs.json +14 -0
- wisent/examples/scripts/results/test_evalita_sp2/test_evalita-sp_sum_task_fp-small_p1_evaluation.json +30 -0
- wisent/examples/scripts/results/test_evalita_sp2/test_evalita-sp_sum_task_fp-small_p1_pairs.json +8 -0
- wisent/examples/scripts/results/test_fda_evaluation.json +30 -0
- wisent/examples/scripts/results/test_fda_pairs.json +8 -0
- wisent/examples/scripts/results/test_financial_tweets_evaluation.json +30 -0
- wisent/examples/scripts/results/test_financial_tweets_pairs.json +8 -0
- wisent/examples/scripts/results/test_fld/test_fld_evaluation.json +30 -0
- wisent/examples/scripts/results/test_fld/test_fld_pairs.json +8 -0
- wisent/examples/scripts/results/test_fld_evaluation.json +30 -0
- wisent/examples/scripts/results/test_fld_fixed/test_fld_evaluation.json +30 -0
- wisent/examples/scripts/results/test_fld_fixed/test_fld_pairs.json +8 -0
- wisent/examples/scripts/results/test_fld_pairs.json +8 -0
- wisent/examples/scripts/results/test_flores_evaluation.json +51 -0
- wisent/examples/scripts/results/test_flores_pairs.json +14 -0
- wisent/examples/scripts/results/test_freebase_evaluation.json +30 -0
- wisent/examples/scripts/results/test_freebase_pairs.json +8 -0
- wisent/examples/scripts/results/test_french_bench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_french_bench_pairs.json +14 -0
- wisent/examples/scripts/results/test_galcola_evaluation.json +30 -0
- wisent/examples/scripts/results/test_galcola_pairs.json +8 -0
- wisent/examples/scripts/results/test_galician_bench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_galician_bench_pairs.json +14 -0
- wisent/examples/scripts/results/test_glianorex_evaluation.json +30 -0
- wisent/examples/scripts/results/test_glianorex_pairs.json +8 -0
- wisent/examples/scripts/results/test_global_mmlu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_global_mmlu_pairs.json +14 -0
- wisent/examples/scripts/results/test_glue_evaluation.json +51 -0
- wisent/examples/scripts/results/test_glue_pairs.json +14 -0
- wisent/examples/scripts/results/test_gpqa_evaluation.json +51 -0
- wisent/examples/scripts/results/test_gpqa_pairs.json +14 -0
- wisent/examples/scripts/results/test_gpt3_translation_benchmarks_evaluation.json +51 -0
- wisent/examples/scripts/results/test_gpt3_translation_benchmarks_pairs.json +14 -0
- wisent/examples/scripts/results/test_groundcocoa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_groundcocoa_pairs.json +8 -0
- wisent/examples/scripts/results/test_gsm8k_evaluation.json +30 -0
- wisent/examples/scripts/results/test_gsm8k_pairs.json +8 -0
- wisent/examples/scripts/results/test_haerae_evaluation.json +51 -0
- wisent/examples/scripts/results/test_haerae_pairs.json +14 -0
- wisent/examples/scripts/results/test_headqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_headqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_hellaswag_evaluation.json +30 -0
- wisent/examples/scripts/results/test_hellaswag_pairs.json +8 -0
- wisent/examples/scripts/results/test_hendrycks_ethics_evaluation.json +51 -0
- wisent/examples/scripts/results/test_hendrycks_ethics_pairs.json +14 -0
- wisent/examples/scripts/results/test_hendrycks_math_evaluation.json +51 -0
- wisent/examples/scripts/results/test_hendrycks_math_pairs.json +14 -0
- wisent/examples/scripts/results/test_histoires_morales_evaluation.json +30 -0
- wisent/examples/scripts/results/test_histoires_morales_pairs.json +8 -0
- wisent/examples/scripts/results/test_hmmt_evaluation.json +30 -0
- wisent/examples/scripts/results/test_hmmt_feb_2025_evaluation.json +30 -0
- wisent/examples/scripts/results/test_hmmt_feb_2025_pairs.json +8 -0
- wisent/examples/scripts/results/test_hmmt_pairs.json +8 -0
- wisent/examples/scripts/results/test_hrm8k_evaluation.json +51 -0
- wisent/examples/scripts/results/test_hrm8k_pairs.json +14 -0
- wisent/examples/scripts/results/test_humaneval_evaluation.json +30 -0
- wisent/examples/scripts/results/test_humaneval_pairs.json +8 -0
- wisent/examples/scripts/results/test_humaneval_plus_evaluation.json +30 -0
- wisent/examples/scripts/results/test_humaneval_plus_pairs.json +8 -0
- wisent/examples/scripts/results/test_ifeval_evaluation.json +30 -0
- wisent/examples/scripts/results/test_ifeval_pairs.json +8 -0
- wisent/examples/scripts/results/test_instruct_humaneval/test_instruct_humaneval_evaluation.json +30 -0
- wisent/examples/scripts/results/test_instruct_humaneval/test_instruct_humaneval_pairs.json +8 -0
- wisent/examples/scripts/results/test_instruct_humaneval_evaluation.json +30 -0
- wisent/examples/scripts/results/test_instruct_humaneval_pairs.json +8 -0
- wisent/examples/scripts/results/test_inverse_scaling_evaluation.json +51 -0
- wisent/examples/scripts/results/test_inverse_scaling_hindsight_neglect_10shot_evaluation.json +30 -0
- wisent/examples/scripts/results/test_inverse_scaling_hindsight_neglect_10shot_pairs.json +8 -0
- wisent/examples/scripts/results/test_inverse_scaling_mc/test_inverse_scaling_mc_evaluation.json +51 -0
- wisent/examples/scripts/results/test_inverse_scaling_mc/test_inverse_scaling_mc_pairs.json +14 -0
- wisent/examples/scripts/results/test_inverse_scaling_pairs.json +14 -0
- wisent/examples/scripts/results/test_iwslt2017-ar-en_evaluation.json +30 -0
- wisent/examples/scripts/results/test_iwslt2017-ar-en_pairs.json +8 -0
- wisent/examples/scripts/results/test_iwslt2017-en-ar_evaluation.json +30 -0
- wisent/examples/scripts/results/test_iwslt2017-en-ar_pairs.json +8 -0
- wisent/examples/scripts/results/test_iwslt2017_ar_en/test_iwslt2017-ar-en_evaluation.json +30 -0
- wisent/examples/scripts/results/test_iwslt2017_ar_en/test_iwslt2017-ar-en_pairs.json +8 -0
- wisent/examples/scripts/results/test_iwslt2017_en_ar/test_iwslt2017-en-ar_evaluation.json +30 -0
- wisent/examples/scripts/results/test_iwslt2017_en_ar/test_iwslt2017-en-ar_pairs.json +8 -0
- wisent/examples/scripts/results/test_iwslt2017_group/test_iwslt2017_evaluation.json +30 -0
- wisent/examples/scripts/results/test_iwslt2017_group/test_iwslt2017_pairs.json +8 -0
- wisent/examples/scripts/results/test_japanese_leaderboard_evaluation.json +51 -0
- wisent/examples/scripts/results/test_japanese_leaderboard_pairs.json +14 -0
- wisent/examples/scripts/results/test_jsonschema_bench/test_jsonschema_bench_evaluation.json +30 -0
- wisent/examples/scripts/results/test_jsonschema_bench/test_jsonschema_bench_pairs.json +8 -0
- wisent/examples/scripts/results/test_jsonschema_bench_evaluation.json +30 -0
- wisent/examples/scripts/results/test_jsonschema_bench_final/test_jsonschema_bench_evaluation.json +30 -0
- wisent/examples/scripts/results/test_jsonschema_bench_final/test_jsonschema_bench_pairs.json +8 -0
- wisent/examples/scripts/results/test_jsonschema_bench_pairs.json +8 -0
- wisent/examples/scripts/results/test_kbl_evaluation.json +51 -0
- wisent/examples/scripts/results/test_kbl_fixed/test_kbl_evaluation.json +51 -0
- wisent/examples/scripts/results/test_kbl_fixed/test_kbl_pairs.json +14 -0
- wisent/examples/scripts/results/test_kbl_pairs.json +14 -0
- wisent/examples/scripts/results/test_kmmlu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_kmmlu_pairs.json +14 -0
- wisent/examples/scripts/results/test_kobest_evaluation.json +51 -0
- wisent/examples/scripts/results/test_kobest_pairs.json +14 -0
- wisent/examples/scripts/results/test_kormedmcqa/test_kormedmcqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_kormedmcqa/test_kormedmcqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_kormedmcqa_dentist/test_kormedmcqa_dentist_evaluation.json +30 -0
- wisent/examples/scripts/results/test_kormedmcqa_dentist/test_kormedmcqa_dentist_pairs.json +8 -0
- wisent/examples/scripts/results/test_kormedmcqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_kormedmcqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_lambada_cloze_evaluation.json +30 -0
- wisent/examples/scripts/results/test_lambada_cloze_pairs.json +8 -0
- wisent/examples/scripts/results/test_lambada_evaluation.json +30 -0
- wisent/examples/scripts/results/test_lambada_final/test_lambada_openai_mt_stablelm_en_evaluation.json +30 -0
- wisent/examples/scripts/results/test_lambada_final/test_lambada_openai_mt_stablelm_en_pairs.json +8 -0
- wisent/examples/scripts/results/test_lambada_multilingual/test_lambada_multilingual_evaluation.json +51 -0
- wisent/examples/scripts/results/test_lambada_multilingual/test_lambada_multilingual_pairs.json +14 -0
- wisent/examples/scripts/results/test_lambada_multilingual_evaluation.json +51 -0
- wisent/examples/scripts/results/test_lambada_multilingual_pairs.json +14 -0
- wisent/examples/scripts/results/test_lambada_multilingual_stablelm_evaluation.json +51 -0
- wisent/examples/scripts/results/test_lambada_multilingual_stablelm_pairs.json +14 -0
- wisent/examples/scripts/results/test_lambada_openai_evaluation.json +30 -0
- wisent/examples/scripts/results/test_lambada_openai_pairs.json +8 -0
- wisent/examples/scripts/results/test_lambada_pairs.json +8 -0
- wisent/examples/scripts/results/test_lambada_stablelm_en_fixed/test_lambada_openai_mt_stablelm_en_evaluation.json +30 -0
- wisent/examples/scripts/results/test_lambada_stablelm_en_fixed/test_lambada_openai_mt_stablelm_en_pairs.json +8 -0
- wisent/examples/scripts/results/test_lambada_stablelm_fixed/test_lambada_openai_mt_stablelm_en_evaluation.json +30 -0
- wisent/examples/scripts/results/test_lambada_stablelm_fixed/test_lambada_openai_mt_stablelm_en_pairs.json +8 -0
- wisent/examples/scripts/results/test_lambada_standard_evaluation.json +30 -0
- wisent/examples/scripts/results/test_lambada_standard_pairs.json +8 -0
- wisent/examples/scripts/results/test_leaderboard_evaluation.json +51 -0
- wisent/examples/scripts/results/test_leaderboard_pairs.json +14 -0
- wisent/examples/scripts/results/test_libra/test_libra_evaluation.json +51 -0
- wisent/examples/scripts/results/test_libra/test_libra_pairs.json +14 -0
- wisent/examples/scripts/results/test_libra_evaluation.json +51 -0
- wisent/examples/scripts/results/test_libra_pairs.json +14 -0
- wisent/examples/scripts/results/test_lingoly_evaluation.json +30 -0
- wisent/examples/scripts/results/test_lingoly_pairs.json +8 -0
- wisent/examples/scripts/results/test_livecodebench_evaluation.json +30 -0
- wisent/examples/scripts/results/test_livecodebench_pairs.json +8 -0
- wisent/examples/scripts/results/test_livemathbench_cnmo_en_evaluation.json +30 -0
- wisent/examples/scripts/results/test_livemathbench_cnmo_en_pairs.json +8 -0
- wisent/examples/scripts/results/test_livemathbench_cnmo_zh_evaluation.json +30 -0
- wisent/examples/scripts/results/test_livemathbench_cnmo_zh_pairs.json +8 -0
- wisent/examples/scripts/results/test_llama_evaluation.json +30 -0
- wisent/examples/scripts/results/test_llama_pairs.json +8 -0
- wisent/examples/scripts/results/test_logiqa2_evaluation.json +30 -0
- wisent/examples/scripts/results/test_logiqa2_pairs.json +8 -0
- wisent/examples/scripts/results/test_logiqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_logiqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_m_mmlu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_m_mmlu_pairs.json +14 -0
- wisent/examples/scripts/results/test_mastermind/test_mastermind_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mastermind/test_mastermind_pairs.json +14 -0
- wisent/examples/scripts/results/test_mastermind_24_easy/test_mastermind_24_easy_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mastermind_24_easy/test_mastermind_24_easy_pairs.json +8 -0
- wisent/examples/scripts/results/test_mastermind_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mastermind_pairs.json +14 -0
- wisent/examples/scripts/results/test_math500_evaluation.json +30 -0
- wisent/examples/scripts/results/test_math500_pairs.json +8 -0
- wisent/examples/scripts/results/test_math_evaluation.json +30 -0
- wisent/examples/scripts/results/test_math_pairs.json +8 -0
- wisent/examples/scripts/results/test_mathqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mathqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_mbpp_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mbpp_pairs.json +8 -0
- wisent/examples/scripts/results/test_mbpp_plus_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mbpp_plus_pairs.json +8 -0
- wisent/examples/scripts/results/test_mc_taco_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mc_taco_pairs.json +8 -0
- wisent/examples/scripts/results/test_med_concepts_qa/test_med_concepts_qa_evaluation.json +51 -0
- wisent/examples/scripts/results/test_med_concepts_qa/test_med_concepts_qa_pairs.json +14 -0
- wisent/examples/scripts/results/test_med_concepts_qa_atc_easy/test_med_concepts_qa_atc_easy_evaluation.json +30 -0
- wisent/examples/scripts/results/test_med_concepts_qa_atc_easy/test_med_concepts_qa_atc_easy_pairs.json +8 -0
- wisent/examples/scripts/results/test_med_concepts_qa_evaluation.json +51 -0
- wisent/examples/scripts/results/test_med_concepts_qa_pairs.json +14 -0
- wisent/examples/scripts/results/test_meddialog_evaluation.json +30 -0
- wisent/examples/scripts/results/test_meddialog_pairs.json +8 -0
- wisent/examples/scripts/results/test_meddialog_raw_perplexity/test_meddialog_raw_perplexity_evaluation.json +30 -0
- wisent/examples/scripts/results/test_meddialog_raw_perplexity/test_meddialog_raw_perplexity_pairs.json +8 -0
- wisent/examples/scripts/results/test_mediqa_qa2019_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mediqa_qa2019_pairs.json +8 -0
- wisent/examples/scripts/results/test_medmcqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_medmcqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_medqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_medqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_medtext_evaluation.json +30 -0
- wisent/examples/scripts/results/test_medtext_pairs.json +8 -0
- wisent/examples/scripts/results/test_mela_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mela_pairs.json +14 -0
- wisent/examples/scripts/results/test_meqsum_evaluation.json +30 -0
- wisent/examples/scripts/results/test_meqsum_pairs.json +8 -0
- wisent/examples/scripts/results/test_mercury_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mercury_pairs.json +8 -0
- wisent/examples/scripts/results/test_metabench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_metabench_pairs.json +14 -0
- wisent/examples/scripts/results/test_mgsm_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mgsm_pairs.json +14 -0
- wisent/examples/scripts/results/test_mimic_repsum_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mimic_repsum_pairs.json +8 -0
- wisent/examples/scripts/results/test_minerva_math_evaluation.json +51 -0
- wisent/examples/scripts/results/test_minerva_math_pairs.json +14 -0
- wisent/examples/scripts/results/test_mlqa_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mlqa_pairs.json +14 -0
- wisent/examples/scripts/results/test_mmlu-pro-plus_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mmlu-pro-plus_pairs.json +14 -0
- wisent/examples/scripts/results/test_mmlu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mmlu_pairs.json +14 -0
- wisent/examples/scripts/results/test_mmlu_pro_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mmlu_pro_pairs.json +14 -0
- wisent/examples/scripts/results/test_mmlu_prox_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mmlu_prox_pairs.json +14 -0
- wisent/examples/scripts/results/test_mmlusr_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mmlusr_pairs.json +8 -0
- wisent/examples/scripts/results/test_mmmu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_mmmu_pairs.json +14 -0
- wisent/examples/scripts/results/test_mnli_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mnli_pairs.json +8 -0
- wisent/examples/scripts/results/test_model_written_evals_evaluation.json +51 -0
- wisent/examples/scripts/results/test_model_written_evals_pairs.json +14 -0
- wisent/examples/scripts/results/test_moral_stories_evaluation.json +30 -0
- wisent/examples/scripts/results/test_moral_stories_pairs.json +8 -0
- wisent/examples/scripts/results/test_mts_dialog_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mts_dialog_pairs.json +8 -0
- wisent/examples/scripts/results/test_multiblimp_evaluation.json +51 -0
- wisent/examples/scripts/results/test_multiblimp_pairs.json +14 -0
- wisent/examples/scripts/results/test_multimedqa_evaluation.json +51 -0
- wisent/examples/scripts/results/test_multimedqa_pairs.json +14 -0
- wisent/examples/scripts/results/test_multipl_e_evaluation.json +30 -0
- wisent/examples/scripts/results/test_multipl_e_pairs.json +8 -0
- wisent/examples/scripts/results/test_mutual_evaluation.json +30 -0
- wisent/examples/scripts/results/test_mutual_pairs.json +8 -0
- wisent/examples/scripts/results/test_non_greedy_robustness_agieval_aqua_rat_evaluation.json +30 -0
- wisent/examples/scripts/results/test_non_greedy_robustness_agieval_aqua_rat_pairs.json +8 -0
- wisent/examples/scripts/results/test_noreval_evaluation.json +51 -0
- wisent/examples/scripts/results/test_noreval_pairs.json +14 -0
- wisent/examples/scripts/results/test_noticia_evaluation.json +30 -0
- wisent/examples/scripts/results/test_noticia_pairs.json +8 -0
- wisent/examples/scripts/results/test_nq_open_evaluation.json +30 -0
- wisent/examples/scripts/results/test_nq_open_pairs.json +8 -0
- wisent/examples/scripts/results/test_olaph_evaluation.json +30 -0
- wisent/examples/scripts/results/test_olaph_pairs.json +8 -0
- wisent/examples/scripts/results/test_openbookqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_openbookqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_openllm_evaluation.json +51 -0
- wisent/examples/scripts/results/test_openllm_pairs.json +14 -0
- wisent/examples/scripts/results/test_option_order_robustness_agieval_aqua_rat_evaluation.json +30 -0
- wisent/examples/scripts/results/test_option_order_robustness_agieval_aqua_rat_pairs.json +8 -0
- wisent/examples/scripts/results/test_paloma_evaluation.json +51 -0
- wisent/examples/scripts/results/test_paloma_pairs.json +14 -0
- wisent/examples/scripts/results/test_passkey/test_passkey_evaluation.json +30 -0
- wisent/examples/scripts/results/test_passkey/test_passkey_pairs.json +8 -0
- wisent/examples/scripts/results/test_paws-x_evaluation.json +51 -0
- wisent/examples/scripts/results/test_paws-x_pairs.json +14 -0
- wisent/examples/scripts/results/test_paws_en/test_paws_en_evaluation.json +30 -0
- wisent/examples/scripts/results/test_paws_en/test_paws_en_pairs.json +8 -0
- wisent/examples/scripts/results/test_penn_treebank_evaluation.json +30 -0
- wisent/examples/scripts/results/test_penn_treebank_pairs.json +8 -0
- wisent/examples/scripts/results/test_pile_10k/test_pile_10k_evaluation.json +30 -0
- wisent/examples/scripts/results/test_pile_10k/test_pile_10k_pairs.json +8 -0
- wisent/examples/scripts/results/test_piqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_piqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_polemo2_evaluation.json +30 -0
- wisent/examples/scripts/results/test_polemo2_pairs.json +8 -0
- wisent/examples/scripts/results/test_polymath_en_high_evaluation.json +30 -0
- wisent/examples/scripts/results/test_polymath_en_high_pairs.json +8 -0
- wisent/examples/scripts/results/test_polymath_en_medium_evaluation.json +30 -0
- wisent/examples/scripts/results/test_polymath_en_medium_pairs.json +8 -0
- wisent/examples/scripts/results/test_polymath_zh_high_evaluation.json +30 -0
- wisent/examples/scripts/results/test_polymath_zh_high_pairs.json +8 -0
- wisent/examples/scripts/results/test_polymath_zh_medium_evaluation.json +30 -0
- wisent/examples/scripts/results/test_polymath_zh_medium_pairs.json +8 -0
- wisent/examples/scripts/results/test_portuguese_bench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_portuguese_bench_pairs.json +14 -0
- wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat/test_prompt_robustness_agieval_aqua_rat_evaluation.json +30 -0
- wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat/test_prompt_robustness_agieval_aqua_rat_pairs.json +8 -0
- wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat_evaluation.json +30 -0
- wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat_pairs.json +8 -0
- wisent/examples/scripts/results/test_prost_evaluation.json +30 -0
- wisent/examples/scripts/results/test_prost_pairs.json +8 -0
- wisent/examples/scripts/results/test_ptb_evaluation.json +30 -0
- wisent/examples/scripts/results/test_ptb_pairs.json +8 -0
- wisent/examples/scripts/results/test_pubmedqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_pubmedqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_pythia_evaluation.json +51 -0
- wisent/examples/scripts/results/test_pythia_pairs.json +14 -0
- wisent/examples/scripts/results/test_qa4mre_evaluation.json +30 -0
- wisent/examples/scripts/results/test_qa4mre_pairs.json +8 -0
- wisent/examples/scripts/results/test_qasper_evaluation.json +30 -0
- wisent/examples/scripts/results/test_qasper_pairs.json +8 -0
- wisent/examples/scripts/results/test_race_evaluation.json +30 -0
- wisent/examples/scripts/results/test_race_pairs.json +8 -0
- wisent/examples/scripts/results/test_realtoxicityprompts_evaluation.json +30 -0
- wisent/examples/scripts/results/test_realtoxicityprompts_pairs.json +8 -0
- wisent/examples/scripts/results/test_recode_evaluation.json +30 -0
- wisent/examples/scripts/results/test_recode_pairs.json +8 -0
- wisent/examples/scripts/results/test_record_evaluation.json +30 -0
- wisent/examples/scripts/results/test_record_pairs.json +8 -0
- wisent/examples/scripts/results/test_ruler_evaluation.json +51 -0
- wisent/examples/scripts/results/test_ruler_pairs.json +14 -0
- wisent/examples/scripts/results/test_sciq_evaluation.json +30 -0
- wisent/examples/scripts/results/test_sciq_pairs.json +8 -0
- wisent/examples/scripts/results/test_score_evaluation.json +51 -0
- wisent/examples/scripts/results/test_score_pairs.json +14 -0
- wisent/examples/scripts/results/test_self_consistency_evaluation.json +30 -0
- wisent/examples/scripts/results/test_self_consistency_pairs.json +8 -0
- wisent/examples/scripts/results/test_siqa/test_siqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_siqa/test_siqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_siqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_siqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_spanish_bench_evaluation.json +51 -0
- wisent/examples/scripts/results/test_spanish_bench_pairs.json +14 -0
- wisent/examples/scripts/results/test_squad2_evaluation.json +30 -0
- wisent/examples/scripts/results/test_squad2_pairs.json +8 -0
- wisent/examples/scripts/results/test_squadv2_evaluation.json +30 -0
- wisent/examples/scripts/results/test_squadv2_pairs.json +8 -0
- wisent/examples/scripts/results/test_super-glue-lm-eval-v1-seq2seq_evaluation.json +30 -0
- wisent/examples/scripts/results/test_super-glue-lm-eval-v1-seq2seq_pairs.json +8 -0
- wisent/examples/scripts/results/test_super-glue-lm-eval-v1_evaluation.json +51 -0
- wisent/examples/scripts/results/test_super-glue-lm-eval-v1_pairs.json +14 -0
- wisent/examples/scripts/results/test_swag_evaluation.json +30 -0
- wisent/examples/scripts/results/test_swag_pairs.json +8 -0
- wisent/examples/scripts/results/test_tinyBenchmarks_evaluation.json +51 -0
- wisent/examples/scripts/results/test_tinyBenchmarks_pairs.json +14 -0
- wisent/examples/scripts/results/test_tmmluplus_evaluation.json +51 -0
- wisent/examples/scripts/results/test_tmmluplus_pairs.json +14 -0
- wisent/examples/scripts/results/test_translation_evaluation.json +51 -0
- wisent/examples/scripts/results/test_translation_pairs.json +14 -0
- wisent/examples/scripts/results/test_triviaqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_triviaqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_truthfulqa-multi_evaluation.json +51 -0
- wisent/examples/scripts/results/test_truthfulqa-multi_pairs.json +14 -0
- wisent/examples/scripts/results/test_truthfulqa_evaluation.json +30 -0
- wisent/examples/scripts/results/test_truthfulqa_mc1_evaluation.json +30 -0
- wisent/examples/scripts/results/test_truthfulqa_mc1_pairs.json +8 -0
- wisent/examples/scripts/results/test_truthfulqa_mc2_evaluation.json +30 -0
- wisent/examples/scripts/results/test_truthfulqa_mc2_pairs.json +8 -0
- wisent/examples/scripts/results/test_truthfulqa_pairs.json +8 -0
- wisent/examples/scripts/results/test_turkishmmlu_evaluation.json +51 -0
- wisent/examples/scripts/results/test_turkishmmlu_pairs.json +14 -0
- wisent/examples/scripts/results/test_unfair_tos_evaluation.json +30 -0
- wisent/examples/scripts/results/test_unfair_tos_pairs.json +8 -0
- wisent/examples/scripts/results/test_unscramble_evaluation.json +51 -0
- wisent/examples/scripts/results/test_unscramble_pairs.json +14 -0
- wisent/examples/scripts/results/test_webqs_evaluation.json +30 -0
- wisent/examples/scripts/results/test_webqs_pairs.json +8 -0
- wisent/examples/scripts/results/test_wikitext103_evaluation.json +30 -0
- wisent/examples/scripts/results/test_wikitext103_pairs.json +8 -0
- wisent/examples/scripts/results/test_wikitext_evaluation.json +30 -0
- wisent/examples/scripts/results/test_wikitext_pairs.json +8 -0
- wisent/examples/scripts/results/test_winogender_evaluation.json +51 -0
- wisent/examples/scripts/results/test_winogender_pairs.json +14 -0
- wisent/examples/scripts/results/test_winogrande_evaluation.json +30 -0
- wisent/examples/scripts/results/test_winogrande_pairs.json +8 -0
- wisent/examples/scripts/results/test_wmdp_evaluation.json +30 -0
- wisent/examples/scripts/results/test_wmdp_pairs.json +8 -0
- wisent/examples/scripts/results/test_wmt-ro-en-t5-prompt_evaluation.json +30 -0
- wisent/examples/scripts/results/test_wmt-ro-en-t5-prompt_pairs.json +8 -0
- wisent/examples/scripts/results/test_wmt14_en_fr_evaluation.json +30 -0
- wisent/examples/scripts/results/test_wmt14_en_fr_pairs.json +8 -0
- wisent/examples/scripts/results/test_wmt16_en_de_evaluation.json +30 -0
- wisent/examples/scripts/results/test_wmt16_en_de_pairs.json +8 -0
- wisent/examples/scripts/results/test_wmt16_ro_en_evaluation.json +30 -0
- wisent/examples/scripts/results/test_wmt16_ro_en_pairs.json +8 -0
- wisent/examples/scripts/results/test_wsc273_evaluation.json +30 -0
- wisent/examples/scripts/results/test_wsc273_pairs.json +8 -0
- wisent/examples/scripts/results/test_xcopa_evaluation.json +51 -0
- wisent/examples/scripts/results/test_xcopa_pairs.json +14 -0
- wisent/examples/scripts/results/test_xnli_eu_evaluation.json +30 -0
- wisent/examples/scripts/results/test_xnli_eu_pairs.json +8 -0
- wisent/examples/scripts/results/test_xnli_evaluation.json +51 -0
- wisent/examples/scripts/results/test_xnli_pairs.json +14 -0
- wisent/examples/scripts/results/test_xquad_evaluation.json +51 -0
- wisent/examples/scripts/results/test_xquad_pairs.json +14 -0
- wisent/examples/scripts/results/test_xstorycloze_evaluation.json +51 -0
- wisent/examples/scripts/results/test_xstorycloze_pairs.json +14 -0
- wisent/examples/scripts/results/test_xsum_evaluation.json +30 -0
- wisent/examples/scripts/results/test_xsum_pairs.json +8 -0
- wisent/examples/scripts/results/test_xwinograd_evaluation.json +51 -0
- wisent/examples/scripts/results/test_xwinograd_pairs.json +14 -0
- wisent/examples/scripts/results/test_yahoo_answers_topics_evaluation.json +30 -0
- wisent/examples/scripts/results/test_yahoo_answers_topics_pairs.json +8 -0
- wisent/parameters/__init__.py +1 -0
- wisent/parameters/lm_eval/all_lm_eval_task_families.json +169 -0
- wisent/parameters/lm_eval/broken_in_lm_eval.json +10 -0
- wisent/parameters/lm_eval/evaluations_not_lm_eval_tasks.json +0 -0
- wisent/parameters/lm_eval/evaluator_check.json +3476 -0
- wisent/parameters/lm_eval/final_verification.json +24782 -0
- wisent/parameters/lm_eval/group_task_evaluators.json +1833 -0
- wisent/parameters/lm_eval/group_tasks.json +150 -0
- wisent/parameters/lm_eval/individual_tasks.json +402 -0
- wisent/parameters/lm_eval/no_readmes.json +1 -0
- wisent/parameters/lm_eval/not_lm_eval_tasks.json +110 -0
- wisent/parameters/lm_eval/read_tasks.json +208 -0
- wisent/parameters/lm_eval/readme_files.json +208 -0
- wisent/parameters/lm_eval/track_progress_not_lm_eval_tasks.json +128 -0
- wisent/parameters/tasks/missing_task_families.json +2963 -0
- wisent/parameters/tasks/remaining_tasks_to_implement.json +199 -0
- wisent/parameters/tasks/risks.json +10 -0
- wisent/parameters/tasks/skills.json +14 -0
- wisent/parameters/tasks/tasks.json +56031 -0
- wisent/scripts/run_quality_metrics_sweep.sh +315 -0
- wisent/tests/__init__.py +0 -0
- wisent/tests/examples/__init__.py +0 -0
- wisent/tests/examples/cli/__init__.py +0 -0
- wisent/tests/examples/cli/activations/__init__.py +0 -0
- wisent/tests/examples/cli/activations/test_get_activations.py +127 -0
- wisent/tests/examples/cli/classifier/__init__.py +0 -0
- wisent/tests/examples/cli/classifier/test_classifier_examples.py +141 -0
- wisent/tests/examples/cli/contrastive_pairs/__init__.py +0 -0
- wisent/tests/examples/cli/contrastive_pairs/test_generate_pairs.py +89 -0
- wisent/tests/examples/cli/evaluation/__init__.py +0 -0
- wisent/tests/examples/cli/evaluation/test_evaluation_examples.py +117 -0
- wisent/tests/examples/cli/generate/__init__.py +0 -0
- wisent/tests/examples/cli/generate/test_generate_with_classifier.py +146 -0
- wisent/tests/examples/cli/generate/test_generate_with_steering.py +149 -0
- wisent/tests/examples/cli/generate/test_only_generate.py +110 -0
- wisent/tests/examples/cli/multi_steering/__init__.py +0 -0
- wisent/tests/examples/cli/multi_steering/test_multi_steer_from_trained_vectors.py +210 -0
- wisent/tests/examples/cli/multi_steering/test_multi_steer_with_different_parameters.py +205 -0
- wisent/tests/examples/cli/multi_steering/test_train_and_multi_steer.py +174 -0
- wisent/tests/examples/cli/optimizer/__init__.py +0 -0
- wisent/tests/examples/cli/optimizer/test_optimize_sample_size.py +102 -0
- wisent/tests/examples/cli/optimizer/test_optimizer_examples.py +59 -0
- wisent/tests/examples/cli/steering/__init__.py +0 -0
- wisent/tests/examples/cli/steering/test_create_steering_vectors.py +135 -0
- wisent/tests/examples/cli/synthetic/__init__.py +0 -0
- wisent/tests/examples/cli/synthetic/test_synthetic_pairs.py +45 -0
- wisent/tests/nosense/__init__.py +6 -0
- wisent/tests/nosense/base_nosense.py +81 -0
- wisent/tests/nosense/math500_nosense.py +72 -0
- wisent/tests/nosense/test_robustness.py +336 -0
- wisent/tests/test_all_cli_commands.py +674 -0
- wisent/tests/test_geometry_comprehensive.py +327 -0
- wisent/tests/test_titan_geometry.py +257 -0
- wisent/tests/visualize_geometry.py +148 -0
- wisent-0.7.379.dist-info/METADATA +64 -0
- wisent-0.7.379.dist-info/RECORD +1720 -0
- wisent-0.7.379.dist-info/WHEEL +5 -0
- wisent-0.7.379.dist-info/entry_points.txt +2 -0
- wisent-0.7.379.dist-info/licenses/LICENSE +21 -0
- wisent-0.7.379.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1655 @@
|
|
|
1
|
+
"""Diagnostics for steering/control vectors."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import statistics
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Mapping, List, Dict, Any, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
import torch.nn.functional as F
|
|
11
|
+
|
|
12
|
+
from wisent.core.activations.core.atoms import LayerActivations, RawActivationMap
|
|
13
|
+
|
|
14
|
+
from .base import DiagnosticsIssue, DiagnosticsReport, MetricReport
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"ControlVectorDiagnosticsConfig",
|
|
18
|
+
"run_control_vector_diagnostics",
|
|
19
|
+
"run_control_steering_diagnostics",
|
|
20
|
+
"ConeAnalysisConfig",
|
|
21
|
+
"ConeAnalysisResult",
|
|
22
|
+
"check_cone_structure",
|
|
23
|
+
"GeometryAnalysisConfig",
|
|
24
|
+
"GeometryAnalysisResult",
|
|
25
|
+
"StructureType",
|
|
26
|
+
"detect_geometry_structure",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(slots=True)
|
|
31
|
+
class ControlVectorDiagnosticsConfig:
|
|
32
|
+
"""Thresholds and options for control vector diagnostics."""
|
|
33
|
+
|
|
34
|
+
min_norm: float = 1e-4
|
|
35
|
+
max_norm: float | None = None
|
|
36
|
+
zero_value_threshold: float = 1e-8
|
|
37
|
+
max_zero_fraction: float = 0.999
|
|
38
|
+
warn_on_missing: bool = True
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _to_layer_activations(vectors: LayerActivations | RawActivationMap | Mapping[str, object] | None) -> LayerActivations:
|
|
42
|
+
if isinstance(vectors, LayerActivations):
|
|
43
|
+
return vectors
|
|
44
|
+
data: RawActivationMap = vectors or {}
|
|
45
|
+
return LayerActivations(data)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def run_control_vector_diagnostics(
|
|
49
|
+
vectors: LayerActivations | RawActivationMap | Mapping[str, object] | None,
|
|
50
|
+
config: ControlVectorDiagnosticsConfig | None = None,
|
|
51
|
+
) -> DiagnosticsReport:
|
|
52
|
+
"""Evaluate steering/control vectors for basic health metrics."""
|
|
53
|
+
|
|
54
|
+
cfg = config or ControlVectorDiagnosticsConfig()
|
|
55
|
+
activations = _to_layer_activations(vectors)
|
|
56
|
+
|
|
57
|
+
issues: list[DiagnosticsIssue] = []
|
|
58
|
+
norms: list[float] = []
|
|
59
|
+
zero_fractions: list[float] = []
|
|
60
|
+
per_layer: dict[str, dict[str, float]] = {}
|
|
61
|
+
|
|
62
|
+
for layer, tensor in activations.to_dict().items():
|
|
63
|
+
if tensor is None:
|
|
64
|
+
if cfg.warn_on_missing:
|
|
65
|
+
issues.append(
|
|
66
|
+
DiagnosticsIssue(
|
|
67
|
+
metric="control_vectors",
|
|
68
|
+
severity="warning",
|
|
69
|
+
message=f"Layer {layer} has no control vector",
|
|
70
|
+
details={"layer": layer},
|
|
71
|
+
)
|
|
72
|
+
)
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
detached = tensor.detach()
|
|
76
|
+
if detached.numel() == 0:
|
|
77
|
+
issues.append(
|
|
78
|
+
DiagnosticsIssue(
|
|
79
|
+
metric="control_vectors",
|
|
80
|
+
severity="critical",
|
|
81
|
+
message=f"Layer {layer} control vector is empty",
|
|
82
|
+
details={"layer": layer},
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
flat = detached.to(dtype=torch.float32, device="cpu").reshape(-1)
|
|
88
|
+
|
|
89
|
+
if not torch.isfinite(flat).all():
|
|
90
|
+
non_finite = (~torch.isfinite(flat)).sum().item()
|
|
91
|
+
issues.append(
|
|
92
|
+
DiagnosticsIssue(
|
|
93
|
+
metric="control_vectors",
|
|
94
|
+
severity="critical",
|
|
95
|
+
message=f"Layer {layer} contains non-finite values",
|
|
96
|
+
details={"layer": layer, "non_finite_entries": int(non_finite)},
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
norm_value = float(torch.linalg.vector_norm(flat).item())
|
|
102
|
+
norms.append(norm_value)
|
|
103
|
+
|
|
104
|
+
zero_fraction = float((flat.abs() <= cfg.zero_value_threshold).sum().item()) / float(flat.numel())
|
|
105
|
+
zero_fractions.append(zero_fraction)
|
|
106
|
+
|
|
107
|
+
per_layer[layer] = {
|
|
108
|
+
"norm": norm_value,
|
|
109
|
+
"zero_fraction": zero_fraction,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if norm_value < cfg.min_norm:
|
|
113
|
+
issues.append(
|
|
114
|
+
DiagnosticsIssue(
|
|
115
|
+
metric="control_vectors",
|
|
116
|
+
severity="critical",
|
|
117
|
+
message=f"Layer {layer} control vector norm {norm_value:.3e} below minimum {cfg.min_norm}",
|
|
118
|
+
details={"layer": layer, "norm": norm_value},
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if cfg.max_norm is not None and norm_value > cfg.max_norm:
|
|
123
|
+
issues.append(
|
|
124
|
+
DiagnosticsIssue(
|
|
125
|
+
metric="control_vectors",
|
|
126
|
+
severity="warning",
|
|
127
|
+
message=f"Layer {layer} control vector norm {norm_value:.3e} exceeds maximum {cfg.max_norm}",
|
|
128
|
+
details={"layer": layer, "norm": norm_value},
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
if zero_fraction >= cfg.max_zero_fraction:
|
|
133
|
+
severity = "critical" if zero_fraction >= 1.0 - 1e-9 else "warning"
|
|
134
|
+
issues.append(
|
|
135
|
+
DiagnosticsIssue(
|
|
136
|
+
metric="control_vectors",
|
|
137
|
+
severity=severity,
|
|
138
|
+
message=(
|
|
139
|
+
f"Layer {layer} control vector is {zero_fraction:.3%} zero-valued, exceeding allowed {cfg.max_zero_fraction:.3%}"
|
|
140
|
+
),
|
|
141
|
+
details={"layer": layer, "zero_fraction": zero_fraction},
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
summary: dict[str, object] = {
|
|
146
|
+
"evaluated_layers": len(norms),
|
|
147
|
+
"norm_min": min(norms) if norms else None,
|
|
148
|
+
"norm_max": max(norms) if norms else None,
|
|
149
|
+
"norm_mean": statistics.mean(norms) if norms else None,
|
|
150
|
+
"norm_median": statistics.median(norms) if norms else None,
|
|
151
|
+
"zero_fraction_max": max(zero_fractions) if zero_fractions else None,
|
|
152
|
+
"per_layer": per_layer,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if not norms and not issues:
|
|
156
|
+
issues.append(
|
|
157
|
+
DiagnosticsIssue(
|
|
158
|
+
metric="control_vectors",
|
|
159
|
+
severity="critical",
|
|
160
|
+
message="No control vectors were provided for diagnostics",
|
|
161
|
+
details={},
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
report = MetricReport(name="control_vectors", summary=summary, issues=issues)
|
|
166
|
+
return DiagnosticsReport.from_metrics([report])
|
|
167
|
+
|
|
168
|
+
def run_control_steering_diagnostics(steering_vectors: list[RawActivationMap] | RawActivationMap | None) -> list[DiagnosticsReport]:
|
|
169
|
+
if steering_vectors is None:
|
|
170
|
+
return [DiagnosticsReport.from_metrics([])]
|
|
171
|
+
|
|
172
|
+
if not isinstance(steering_vectors, list):
|
|
173
|
+
steering_vectors = [steering_vectors]
|
|
174
|
+
|
|
175
|
+
# Run diagnostics for each steering vector
|
|
176
|
+
reports = [run_control_vector_diagnostics(vec) for vec in steering_vectors]
|
|
177
|
+
return reports
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@dataclass
|
|
181
|
+
class ConeAnalysisConfig:
|
|
182
|
+
"""Configuration for cone structure analysis."""
|
|
183
|
+
|
|
184
|
+
num_directions: int = 5
|
|
185
|
+
"""Number of directions to discover in the cone."""
|
|
186
|
+
|
|
187
|
+
optimization_steps: int = 100
|
|
188
|
+
"""Gradient steps for cone direction optimization."""
|
|
189
|
+
|
|
190
|
+
learning_rate: float = 0.01
|
|
191
|
+
"""Learning rate for optimization."""
|
|
192
|
+
|
|
193
|
+
min_cosine_similarity: float = 0.2
|
|
194
|
+
"""Minimum cosine similarity between cone directions (should be positive)."""
|
|
195
|
+
|
|
196
|
+
max_cosine_similarity: float = 0.95
|
|
197
|
+
"""Maximum cosine similarity (avoid redundant directions)."""
|
|
198
|
+
|
|
199
|
+
pca_components: int = 5
|
|
200
|
+
"""Number of PCA components to compare against."""
|
|
201
|
+
|
|
202
|
+
cone_threshold: float = 0.7
|
|
203
|
+
"""Threshold for cone_score to declare cone structure exists."""
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@dataclass
|
|
207
|
+
class ConeAnalysisResult:
|
|
208
|
+
"""Results from cone structure analysis."""
|
|
209
|
+
|
|
210
|
+
has_cone_structure: bool
|
|
211
|
+
"""Whether a cone structure was detected."""
|
|
212
|
+
|
|
213
|
+
cone_score: float
|
|
214
|
+
"""Score from 0-1 indicating cone-ness (1 = perfect cone)."""
|
|
215
|
+
|
|
216
|
+
pca_explained_variance: float
|
|
217
|
+
"""Variance explained by PCA directions."""
|
|
218
|
+
|
|
219
|
+
cone_explained_variance: float
|
|
220
|
+
"""Variance explained by cone directions."""
|
|
221
|
+
|
|
222
|
+
num_directions_found: int
|
|
223
|
+
"""Number of valid cone directions discovered."""
|
|
224
|
+
|
|
225
|
+
direction_cosine_similarities: List[List[float]]
|
|
226
|
+
"""Pairwise cosine similarities between discovered directions."""
|
|
227
|
+
|
|
228
|
+
avg_cosine_similarity: float
|
|
229
|
+
"""Average pairwise cosine similarity (high = more cone-like)."""
|
|
230
|
+
|
|
231
|
+
half_space_consistency: float
|
|
232
|
+
"""Fraction of directions in same half-space as primary (1.0 = perfect cone)."""
|
|
233
|
+
|
|
234
|
+
separation_scores: List[float]
|
|
235
|
+
"""Per-direction separation between positive and negative activations."""
|
|
236
|
+
|
|
237
|
+
positive_combination_score: float
|
|
238
|
+
"""How well positive activations can be represented as positive combinations."""
|
|
239
|
+
|
|
240
|
+
details: Dict[str, Any] = field(default_factory=dict)
|
|
241
|
+
"""Additional diagnostic details."""
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def check_cone_structure(
|
|
245
|
+
pos_activations: torch.Tensor,
|
|
246
|
+
neg_activations: torch.Tensor,
|
|
247
|
+
config: ConeAnalysisConfig | None = None,
|
|
248
|
+
) -> ConeAnalysisResult:
|
|
249
|
+
"""
|
|
250
|
+
Analyze whether activations form a cone structure vs linear subspace.
|
|
251
|
+
|
|
252
|
+
A cone structure implies:
|
|
253
|
+
1. Multiple directions mediate the behavior (not just one)
|
|
254
|
+
2. These directions are positively correlated (same half-space)
|
|
255
|
+
3. The behavior can be achieved by positive combinations of directions
|
|
256
|
+
4. Cone explains variance better than or comparable to PCA
|
|
257
|
+
|
|
258
|
+
Arguments:
|
|
259
|
+
pos_activations: Positive example activations [N_pos, hidden_dim]
|
|
260
|
+
neg_activations: Negative example activations [N_neg, hidden_dim]
|
|
261
|
+
config: Analysis configuration
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
ConeAnalysisResult with cone detection metrics
|
|
265
|
+
"""
|
|
266
|
+
cfg = config or ConeAnalysisConfig()
|
|
267
|
+
|
|
268
|
+
pos_tensor = pos_activations.detach().float()
|
|
269
|
+
neg_tensor = neg_activations.detach().float()
|
|
270
|
+
|
|
271
|
+
if pos_tensor.dim() == 1:
|
|
272
|
+
pos_tensor = pos_tensor.unsqueeze(0)
|
|
273
|
+
if neg_tensor.dim() == 1:
|
|
274
|
+
neg_tensor = neg_tensor.unsqueeze(0)
|
|
275
|
+
|
|
276
|
+
hidden_dim = pos_tensor.shape[1]
|
|
277
|
+
|
|
278
|
+
# Compute difference vectors (the directions we want to analyze)
|
|
279
|
+
diff_vectors = pos_tensor.mean(dim=0, keepdim=True) - neg_tensor.mean(dim=0, keepdim=True)
|
|
280
|
+
|
|
281
|
+
# 1. PCA Analysis - find linear directions
|
|
282
|
+
pca_directions, pca_explained = _compute_pca_directions(
|
|
283
|
+
pos_tensor, neg_tensor, n_components=cfg.pca_components
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# 2. Cone Direction Discovery - gradient-based optimization
|
|
287
|
+
cone_directions, cone_metadata = _discover_cone_directions(
|
|
288
|
+
pos_tensor, neg_tensor,
|
|
289
|
+
num_directions=cfg.num_directions,
|
|
290
|
+
optimization_steps=cfg.optimization_steps,
|
|
291
|
+
learning_rate=cfg.learning_rate,
|
|
292
|
+
min_cos_sim=cfg.min_cosine_similarity,
|
|
293
|
+
max_cos_sim=cfg.max_cosine_similarity,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# 3. Compute cone explained variance
|
|
297
|
+
cone_explained = _compute_cone_explained_variance(
|
|
298
|
+
pos_tensor, neg_tensor, cone_directions
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# 4. Half-space consistency check
|
|
302
|
+
half_space_score = _check_half_space_consistency(cone_directions)
|
|
303
|
+
|
|
304
|
+
# 5. Positive combination test
|
|
305
|
+
pos_combo_score = _test_positive_combinations(
|
|
306
|
+
pos_tensor, neg_tensor, cone_directions
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# 6. Compute cosine similarity matrix
|
|
310
|
+
cos_sim_matrix = _compute_cosine_similarity_matrix(cone_directions)
|
|
311
|
+
avg_cos_sim = _compute_avg_off_diagonal(cos_sim_matrix)
|
|
312
|
+
|
|
313
|
+
# 7. Separation scores per direction
|
|
314
|
+
separation_scores = _compute_separation_scores(
|
|
315
|
+
pos_tensor, neg_tensor, cone_directions
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# 8. Compute final cone score
|
|
319
|
+
cone_score = _compute_cone_score(
|
|
320
|
+
pca_explained=pca_explained,
|
|
321
|
+
cone_explained=cone_explained,
|
|
322
|
+
half_space_score=half_space_score,
|
|
323
|
+
avg_cos_sim=avg_cos_sim,
|
|
324
|
+
pos_combo_score=pos_combo_score,
|
|
325
|
+
separation_scores=separation_scores,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
has_cone = cone_score >= cfg.cone_threshold
|
|
329
|
+
|
|
330
|
+
return ConeAnalysisResult(
|
|
331
|
+
has_cone_structure=has_cone,
|
|
332
|
+
cone_score=cone_score,
|
|
333
|
+
pca_explained_variance=pca_explained,
|
|
334
|
+
cone_explained_variance=cone_explained,
|
|
335
|
+
num_directions_found=cone_directions.shape[0],
|
|
336
|
+
direction_cosine_similarities=cos_sim_matrix.tolist(),
|
|
337
|
+
avg_cosine_similarity=avg_cos_sim,
|
|
338
|
+
half_space_consistency=half_space_score,
|
|
339
|
+
separation_scores=separation_scores,
|
|
340
|
+
positive_combination_score=pos_combo_score,
|
|
341
|
+
details={
|
|
342
|
+
"config": cfg.__dict__,
|
|
343
|
+
"cone_metadata": cone_metadata,
|
|
344
|
+
"pca_directions_shape": list(pca_directions.shape),
|
|
345
|
+
"cone_directions_shape": list(cone_directions.shape),
|
|
346
|
+
}
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _compute_pca_directions(
|
|
351
|
+
pos_tensor: torch.Tensor,
|
|
352
|
+
neg_tensor: torch.Tensor,
|
|
353
|
+
n_components: int,
|
|
354
|
+
) -> Tuple[torch.Tensor, float]:
|
|
355
|
+
"""Compute PCA directions and explained variance ratio."""
|
|
356
|
+
# Combine all activations
|
|
357
|
+
all_activations = torch.cat([pos_tensor, neg_tensor], dim=0)
|
|
358
|
+
|
|
359
|
+
# Center the data
|
|
360
|
+
mean = all_activations.mean(dim=0, keepdim=True)
|
|
361
|
+
centered = all_activations - mean
|
|
362
|
+
|
|
363
|
+
# SVD for PCA
|
|
364
|
+
try:
|
|
365
|
+
U, S, Vh = torch.linalg.svd(centered, full_matrices=False)
|
|
366
|
+
|
|
367
|
+
# Get top k directions
|
|
368
|
+
k = min(n_components, Vh.shape[0])
|
|
369
|
+
pca_directions = Vh[:k] # [k, hidden_dim]
|
|
370
|
+
|
|
371
|
+
# Explained variance ratio
|
|
372
|
+
total_var = (S ** 2).sum()
|
|
373
|
+
explained_var = (S[:k] ** 2).sum() / total_var if total_var > 0 else 0.0
|
|
374
|
+
|
|
375
|
+
return pca_directions, float(explained_var)
|
|
376
|
+
except Exception:
|
|
377
|
+
# Fallback if SVD fails
|
|
378
|
+
return torch.zeros(n_components, pos_tensor.shape[1]), 0.0
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _discover_cone_directions(
|
|
382
|
+
pos_tensor: torch.Tensor,
|
|
383
|
+
neg_tensor: torch.Tensor,
|
|
384
|
+
num_directions: int,
|
|
385
|
+
optimization_steps: int,
|
|
386
|
+
learning_rate: float,
|
|
387
|
+
min_cos_sim: float,
|
|
388
|
+
max_cos_sim: float,
|
|
389
|
+
) -> Tuple[torch.Tensor, Dict[str, Any]]:
|
|
390
|
+
"""
|
|
391
|
+
Discover cone directions via gradient optimization.
|
|
392
|
+
Similar to PRISM but focused on cone structure detection.
|
|
393
|
+
"""
|
|
394
|
+
hidden_dim = pos_tensor.shape[1]
|
|
395
|
+
|
|
396
|
+
# Initialize with CAA direction first, then random perturbations
|
|
397
|
+
caa_dir = pos_tensor.mean(dim=0) - neg_tensor.mean(dim=0)
|
|
398
|
+
caa_dir = F.normalize(caa_dir, p=2, dim=0)
|
|
399
|
+
|
|
400
|
+
directions = torch.randn(num_directions, hidden_dim)
|
|
401
|
+
directions[0] = caa_dir
|
|
402
|
+
|
|
403
|
+
for i in range(1, num_directions):
|
|
404
|
+
noise = torch.randn(hidden_dim) * 0.3
|
|
405
|
+
directions[i] = F.normalize(caa_dir + noise, p=2, dim=0)
|
|
406
|
+
|
|
407
|
+
directions = F.normalize(directions, p=2, dim=1)
|
|
408
|
+
directions.requires_grad_(True)
|
|
409
|
+
|
|
410
|
+
optimizer = torch.optim.Adam([directions], lr=learning_rate)
|
|
411
|
+
|
|
412
|
+
training_losses = []
|
|
413
|
+
|
|
414
|
+
for step in range(optimization_steps):
|
|
415
|
+
optimizer.zero_grad()
|
|
416
|
+
|
|
417
|
+
# Normalize for computation
|
|
418
|
+
dirs_norm = F.normalize(directions, p=2, dim=1)
|
|
419
|
+
|
|
420
|
+
# Loss 1: Separation - each direction should separate pos from neg
|
|
421
|
+
pos_proj = pos_tensor @ dirs_norm.T # [N_pos, K]
|
|
422
|
+
neg_proj = neg_tensor @ dirs_norm.T # [N_neg, K]
|
|
423
|
+
separation_loss = -((pos_proj.mean(dim=0) - neg_proj.mean(dim=0)).abs().mean())
|
|
424
|
+
|
|
425
|
+
# Loss 2: Cone constraint - directions should be positively correlated
|
|
426
|
+
cos_sim = dirs_norm @ dirs_norm.T
|
|
427
|
+
off_diag_mask = 1 - torch.eye(num_directions)
|
|
428
|
+
off_diag = cos_sim * off_diag_mask
|
|
429
|
+
|
|
430
|
+
# Penalize negative correlations (not a cone)
|
|
431
|
+
negative_penalty = F.relu(-off_diag).sum()
|
|
432
|
+
|
|
433
|
+
# Penalize too similar (redundant)
|
|
434
|
+
too_similar = F.relu(off_diag - max_cos_sim).sum()
|
|
435
|
+
|
|
436
|
+
# Penalize too dissimilar (not a cone)
|
|
437
|
+
too_dissimilar = F.relu(min_cos_sim - off_diag).sum()
|
|
438
|
+
|
|
439
|
+
cone_loss = negative_penalty + too_similar + 0.5 * too_dissimilar
|
|
440
|
+
|
|
441
|
+
# Loss 3: Diversity - directions should capture different aspects
|
|
442
|
+
diversity_loss = -off_diag.var()
|
|
443
|
+
|
|
444
|
+
total_loss = separation_loss + 0.5 * cone_loss + 0.1 * diversity_loss
|
|
445
|
+
|
|
446
|
+
total_loss.backward()
|
|
447
|
+
optimizer.step()
|
|
448
|
+
|
|
449
|
+
# Project back to unit sphere
|
|
450
|
+
with torch.no_grad():
|
|
451
|
+
directions.data = F.normalize(directions.data, p=2, dim=1)
|
|
452
|
+
|
|
453
|
+
# Ensure cone constraint: flip directions to same half-space as first
|
|
454
|
+
if directions.shape[0] > 1:
|
|
455
|
+
primary = directions[0:1]
|
|
456
|
+
for i in range(1, directions.shape[0]):
|
|
457
|
+
if (directions[i:i+1] @ primary.T).item() < 0:
|
|
458
|
+
directions.data[i] = -directions.data[i]
|
|
459
|
+
|
|
460
|
+
if step % 20 == 0:
|
|
461
|
+
training_losses.append(float(total_loss.item()))
|
|
462
|
+
|
|
463
|
+
final_directions = directions.detach()
|
|
464
|
+
|
|
465
|
+
metadata = {
|
|
466
|
+
"training_losses": training_losses,
|
|
467
|
+
"final_loss": float(total_loss.item()),
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
return final_directions, metadata
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _compute_cone_explained_variance(
|
|
474
|
+
pos_tensor: torch.Tensor,
|
|
475
|
+
neg_tensor: torch.Tensor,
|
|
476
|
+
cone_directions: torch.Tensor,
|
|
477
|
+
) -> float:
|
|
478
|
+
"""Compute how much variance the cone directions explain."""
|
|
479
|
+
all_activations = torch.cat([pos_tensor, neg_tensor], dim=0)
|
|
480
|
+
mean = all_activations.mean(dim=0, keepdim=True)
|
|
481
|
+
centered = all_activations - mean
|
|
482
|
+
|
|
483
|
+
total_var = (centered ** 2).sum()
|
|
484
|
+
if total_var == 0:
|
|
485
|
+
return 0.0
|
|
486
|
+
|
|
487
|
+
# Project onto cone directions
|
|
488
|
+
dirs_norm = F.normalize(cone_directions, p=2, dim=1)
|
|
489
|
+
projections = centered @ dirs_norm.T # [N, K]
|
|
490
|
+
reconstructed = projections @ dirs_norm # [N, hidden_dim]
|
|
491
|
+
|
|
492
|
+
explained_var = (reconstructed ** 2).sum() / total_var
|
|
493
|
+
return float(min(explained_var.item(), 1.0))
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _check_half_space_consistency(directions: torch.Tensor) -> float:
|
|
497
|
+
"""Check what fraction of directions are in the same half-space as the primary."""
|
|
498
|
+
if directions.shape[0] <= 1:
|
|
499
|
+
return 1.0
|
|
500
|
+
|
|
501
|
+
dirs_norm = F.normalize(directions, p=2, dim=1)
|
|
502
|
+
primary = dirs_norm[0:1]
|
|
503
|
+
|
|
504
|
+
# Cosine similarity with primary
|
|
505
|
+
cos_with_primary = (dirs_norm @ primary.T).squeeze()
|
|
506
|
+
|
|
507
|
+
# Count positive correlations
|
|
508
|
+
positive_count = (cos_with_primary > 0).sum().item()
|
|
509
|
+
return positive_count / directions.shape[0]
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def _test_positive_combinations(
|
|
513
|
+
pos_tensor: torch.Tensor,
|
|
514
|
+
neg_tensor: torch.Tensor,
|
|
515
|
+
directions: torch.Tensor,
|
|
516
|
+
) -> float:
|
|
517
|
+
"""
|
|
518
|
+
Test if difference vectors can be represented as positive combinations
|
|
519
|
+
of cone directions (key property of polyhedral cones).
|
|
520
|
+
"""
|
|
521
|
+
diff = pos_tensor.mean(dim=0) - neg_tensor.mean(dim=0)
|
|
522
|
+
diff_norm = F.normalize(diff.unsqueeze(0), p=2, dim=1)
|
|
523
|
+
dirs_norm = F.normalize(directions, p=2, dim=1)
|
|
524
|
+
|
|
525
|
+
# Project difference onto each direction
|
|
526
|
+
projections = (diff_norm @ dirs_norm.T).squeeze()
|
|
527
|
+
|
|
528
|
+
# In a perfect cone, all projections should be non-negative
|
|
529
|
+
positive_projections = (projections >= 0).sum().item()
|
|
530
|
+
|
|
531
|
+
# Also check magnitude - projections should be substantial
|
|
532
|
+
significant_projections = (projections > 0.1).sum().item()
|
|
533
|
+
|
|
534
|
+
# Score combines both
|
|
535
|
+
pos_ratio = positive_projections / directions.shape[0]
|
|
536
|
+
sig_ratio = significant_projections / directions.shape[0]
|
|
537
|
+
|
|
538
|
+
return 0.7 * pos_ratio + 0.3 * sig_ratio
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def _compute_cosine_similarity_matrix(directions: torch.Tensor) -> torch.Tensor:
|
|
542
|
+
"""Compute pairwise cosine similarity matrix."""
|
|
543
|
+
dirs_norm = F.normalize(directions, p=2, dim=1)
|
|
544
|
+
return dirs_norm @ dirs_norm.T
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def _compute_avg_off_diagonal(matrix: torch.Tensor) -> float:
|
|
548
|
+
"""Compute average of off-diagonal elements."""
|
|
549
|
+
n = matrix.shape[0]
|
|
550
|
+
if n <= 1:
|
|
551
|
+
return 1.0
|
|
552
|
+
mask = 1 - torch.eye(n)
|
|
553
|
+
return float((matrix * mask).sum() / (n * (n - 1)))
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def _compute_separation_scores(
|
|
557
|
+
pos_tensor: torch.Tensor,
|
|
558
|
+
neg_tensor: torch.Tensor,
|
|
559
|
+
directions: torch.Tensor,
|
|
560
|
+
) -> List[float]:
|
|
561
|
+
"""Compute separation score for each direction."""
|
|
562
|
+
dirs_norm = F.normalize(directions, p=2, dim=1)
|
|
563
|
+
|
|
564
|
+
pos_proj = pos_tensor @ dirs_norm.T
|
|
565
|
+
neg_proj = neg_tensor @ dirs_norm.T
|
|
566
|
+
|
|
567
|
+
separation = pos_proj.mean(dim=0) - neg_proj.mean(dim=0)
|
|
568
|
+
return separation.tolist()
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def _compute_cone_score(
|
|
572
|
+
pca_explained: float,
|
|
573
|
+
cone_explained: float,
|
|
574
|
+
half_space_score: float,
|
|
575
|
+
avg_cos_sim: float,
|
|
576
|
+
pos_combo_score: float,
|
|
577
|
+
separation_scores: List[float],
|
|
578
|
+
) -> float:
|
|
579
|
+
"""
|
|
580
|
+
Compute overall cone score combining all metrics.
|
|
581
|
+
|
|
582
|
+
High score indicates:
|
|
583
|
+
- Cone explains variance well (comparable to or better than PCA)
|
|
584
|
+
- Directions are in same half-space
|
|
585
|
+
- Directions are positively correlated but not redundant
|
|
586
|
+
- Difference can be expressed as positive combinations
|
|
587
|
+
- Multiple directions contribute to separation
|
|
588
|
+
"""
|
|
589
|
+
# 1. Variance explanation ratio (cone vs PCA)
|
|
590
|
+
var_ratio = cone_explained / max(pca_explained, 1e-6)
|
|
591
|
+
var_score = min(var_ratio, 1.0) # Cap at 1.0
|
|
592
|
+
|
|
593
|
+
# 2. Half-space consistency (critical for cone)
|
|
594
|
+
half_space_component = half_space_score
|
|
595
|
+
|
|
596
|
+
# 3. Cosine similarity should be moderate (0.3-0.7 is ideal for cone)
|
|
597
|
+
# Too low = not a cone, too high = redundant
|
|
598
|
+
if avg_cos_sim < 0:
|
|
599
|
+
cos_score = 0.0 # Negative correlation = not a cone
|
|
600
|
+
elif avg_cos_sim < 0.3:
|
|
601
|
+
cos_score = avg_cos_sim / 0.3 * 0.5 # Below ideal range
|
|
602
|
+
elif avg_cos_sim <= 0.7:
|
|
603
|
+
cos_score = 1.0 # Ideal range
|
|
604
|
+
else:
|
|
605
|
+
cos_score = max(0.5, 1.0 - (avg_cos_sim - 0.7) / 0.3) # Too similar
|
|
606
|
+
|
|
607
|
+
# 4. Positive combination score
|
|
608
|
+
combo_component = pos_combo_score
|
|
609
|
+
|
|
610
|
+
# 5. Multi-direction contribution
|
|
611
|
+
# Check if multiple directions have significant separation
|
|
612
|
+
significant_directions = sum(1 for s in separation_scores if abs(s) > 0.1)
|
|
613
|
+
multi_dir_score = min(significant_directions / max(len(separation_scores), 1), 1.0)
|
|
614
|
+
|
|
615
|
+
# Weighted combination
|
|
616
|
+
cone_score = (
|
|
617
|
+
0.20 * var_score +
|
|
618
|
+
0.25 * half_space_component +
|
|
619
|
+
0.20 * cos_score +
|
|
620
|
+
0.20 * combo_component +
|
|
621
|
+
0.15 * multi_dir_score
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
return float(cone_score)
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
# =============================================================================
|
|
628
|
+
# Comprehensive Geometry Structure Detection
|
|
629
|
+
# =============================================================================
|
|
630
|
+
|
|
631
|
+
from enum import Enum
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
class StructureType(Enum):
|
|
635
|
+
"""Types of geometric structures that can be detected in activation space."""
|
|
636
|
+
LINEAR = "linear"
|
|
637
|
+
CONE = "cone"
|
|
638
|
+
CLUSTER = "cluster"
|
|
639
|
+
MANIFOLD = "manifold"
|
|
640
|
+
SPARSE = "sparse"
|
|
641
|
+
BIMODAL = "bimodal"
|
|
642
|
+
ORTHOGONAL = "orthogonal"
|
|
643
|
+
UNKNOWN = "unknown"
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
@dataclass
|
|
647
|
+
class GeometryAnalysisConfig:
|
|
648
|
+
"""Configuration for comprehensive geometry analysis.
|
|
649
|
+
|
|
650
|
+
Default thresholds are tuned based on the Universal Subspace Hypothesis
|
|
651
|
+
(Kaushik et al., 2025), which shows that neural networks converge to
|
|
652
|
+
shared low-dimensional subspaces. Key implications:
|
|
653
|
+
- Linear structure is more common than previously assumed
|
|
654
|
+
- True cone/manifold structures are rarer
|
|
655
|
+
- ~16 principal directions capture most variance
|
|
656
|
+
"""
|
|
657
|
+
|
|
658
|
+
# General settings
|
|
659
|
+
num_components: int = 5
|
|
660
|
+
"""Number of components/directions to analyze."""
|
|
661
|
+
|
|
662
|
+
optimization_steps: int = 100
|
|
663
|
+
"""Steps for optimization-based methods."""
|
|
664
|
+
|
|
665
|
+
# Linear detection - raised threshold per Universal Subspace findings
|
|
666
|
+
linear_variance_threshold: float = 0.85
|
|
667
|
+
"""Variance explained threshold to declare linear structure."""
|
|
668
|
+
|
|
669
|
+
# Cone detection - lowered threshold (true cones are rarer)
|
|
670
|
+
cone_threshold: float = 0.65
|
|
671
|
+
"""Cone score threshold."""
|
|
672
|
+
|
|
673
|
+
# Cluster detection
|
|
674
|
+
max_clusters: int = 5
|
|
675
|
+
"""Maximum number of clusters to try."""
|
|
676
|
+
|
|
677
|
+
cluster_silhouette_threshold: float = 0.55
|
|
678
|
+
"""Silhouette score threshold for cluster detection."""
|
|
679
|
+
|
|
680
|
+
# Manifold detection
|
|
681
|
+
manifold_neighbors: int = 10
|
|
682
|
+
"""Number of neighbors for manifold analysis."""
|
|
683
|
+
|
|
684
|
+
manifold_threshold: float = 0.70
|
|
685
|
+
"""Score threshold for manifold structure."""
|
|
686
|
+
|
|
687
|
+
# Sparse detection
|
|
688
|
+
sparse_threshold: float = 0.1
|
|
689
|
+
"""Fraction of active dimensions threshold."""
|
|
690
|
+
|
|
691
|
+
# Bimodal detection
|
|
692
|
+
bimodal_dip_threshold: float = 0.05
|
|
693
|
+
"""P-value threshold for dip test."""
|
|
694
|
+
|
|
695
|
+
# Orthogonal detection - stricter (orthogonal is rare in universal subspace)
|
|
696
|
+
orthogonal_threshold: float = 0.12
|
|
697
|
+
"""Max correlation for orthogonal subspaces."""
|
|
698
|
+
|
|
699
|
+
# Universal subspace integration
|
|
700
|
+
use_universal_thresholds: bool = True
|
|
701
|
+
"""Whether to use thresholds tuned for universal subspace theory."""
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
@dataclass
|
|
705
|
+
class StructureScore:
|
|
706
|
+
"""Score for a single structure type."""
|
|
707
|
+
structure_type: StructureType
|
|
708
|
+
score: float
|
|
709
|
+
confidence: float
|
|
710
|
+
details: Dict[str, Any] = field(default_factory=dict)
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
@dataclass
|
|
714
|
+
class GeometryAnalysisResult:
|
|
715
|
+
"""Results from comprehensive geometry analysis."""
|
|
716
|
+
|
|
717
|
+
best_structure: StructureType
|
|
718
|
+
"""The structure type that best fits the data."""
|
|
719
|
+
|
|
720
|
+
best_score: float
|
|
721
|
+
"""Score of the best-fitting structure."""
|
|
722
|
+
|
|
723
|
+
all_scores: Dict[str, StructureScore]
|
|
724
|
+
"""Scores for all analyzed structure types."""
|
|
725
|
+
|
|
726
|
+
recommendation: str
|
|
727
|
+
"""Recommended steering method based on geometry."""
|
|
728
|
+
|
|
729
|
+
details: Dict[str, Any] = field(default_factory=dict)
|
|
730
|
+
"""Additional analysis details."""
|
|
731
|
+
|
|
732
|
+
def get_ranking(self) -> List[Tuple[StructureType, float]]:
|
|
733
|
+
"""Get structures ranked by score."""
|
|
734
|
+
return sorted(
|
|
735
|
+
[(s.structure_type, s.score) for s in self.all_scores.values()],
|
|
736
|
+
key=lambda x: x[1],
|
|
737
|
+
reverse=True
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def detect_geometry_structure(
|
|
742
|
+
pos_activations: torch.Tensor,
|
|
743
|
+
neg_activations: torch.Tensor,
|
|
744
|
+
config: GeometryAnalysisConfig | None = None,
|
|
745
|
+
) -> GeometryAnalysisResult:
|
|
746
|
+
"""
|
|
747
|
+
Detect the geometric structure of activation differences.
|
|
748
|
+
|
|
749
|
+
Uses HIERARCHICAL detection - structures are mutually exclusive:
|
|
750
|
+
- Linear: Single direction explains the data (simplest)
|
|
751
|
+
- Cone: Multiple correlated directions needed (more complex than linear)
|
|
752
|
+
- Cluster: Discrete groups (different from continuous structures)
|
|
753
|
+
- Orthogonal: Independent subspaces (different from cone)
|
|
754
|
+
- Sparse: Few neurons encode the behavior
|
|
755
|
+
- Bimodal: Two distinct modes
|
|
756
|
+
- Manifold: Non-linear curved structure (most general, fallback)
|
|
757
|
+
|
|
758
|
+
The key insight: Linear ⊂ Cone ⊂ Manifold, so we check simpler
|
|
759
|
+
structures first and only report more complex structures if simpler
|
|
760
|
+
ones don't fit well.
|
|
761
|
+
|
|
762
|
+
Arguments:
|
|
763
|
+
pos_activations: Positive example activations [N_pos, hidden_dim]
|
|
764
|
+
neg_activations: Negative example activations [N_neg, hidden_dim]
|
|
765
|
+
config: Analysis configuration
|
|
766
|
+
|
|
767
|
+
Returns:
|
|
768
|
+
GeometryAnalysisResult with scores for each structure type
|
|
769
|
+
"""
|
|
770
|
+
cfg = config or GeometryAnalysisConfig()
|
|
771
|
+
|
|
772
|
+
pos_tensor = pos_activations.detach().float()
|
|
773
|
+
neg_tensor = neg_activations.detach().float()
|
|
774
|
+
|
|
775
|
+
if pos_tensor.dim() == 1:
|
|
776
|
+
pos_tensor = pos_tensor.unsqueeze(0)
|
|
777
|
+
if neg_tensor.dim() == 1:
|
|
778
|
+
neg_tensor = neg_tensor.unsqueeze(0)
|
|
779
|
+
|
|
780
|
+
# Compute difference vectors (primary analysis target)
|
|
781
|
+
diff_vectors = pos_tensor - neg_tensor[:pos_tensor.shape[0]] if neg_tensor.shape[0] >= pos_tensor.shape[0] else pos_tensor[:neg_tensor.shape[0]] - neg_tensor
|
|
782
|
+
|
|
783
|
+
# Compute raw scores for each structure type
|
|
784
|
+
raw_scores: Dict[str, StructureScore] = {}
|
|
785
|
+
|
|
786
|
+
# 1. Linear structure detection
|
|
787
|
+
linear_score = _detect_linear_structure(pos_tensor, neg_tensor, diff_vectors, cfg)
|
|
788
|
+
raw_scores["linear"] = linear_score
|
|
789
|
+
|
|
790
|
+
# 2. Cone structure detection
|
|
791
|
+
cone_score = _detect_cone_structure_score(pos_tensor, neg_tensor, cfg)
|
|
792
|
+
raw_scores["cone"] = cone_score
|
|
793
|
+
|
|
794
|
+
# 3. Cluster structure detection
|
|
795
|
+
cluster_score = _detect_cluster_structure(pos_tensor, neg_tensor, diff_vectors, cfg)
|
|
796
|
+
raw_scores["cluster"] = cluster_score
|
|
797
|
+
|
|
798
|
+
# 4. Manifold structure detection
|
|
799
|
+
manifold_score = _detect_manifold_structure(pos_tensor, neg_tensor, diff_vectors, cfg)
|
|
800
|
+
raw_scores["manifold"] = manifold_score
|
|
801
|
+
|
|
802
|
+
# 5. Sparse structure detection
|
|
803
|
+
sparse_score = _detect_sparse_structure(pos_tensor, neg_tensor, diff_vectors, cfg)
|
|
804
|
+
raw_scores["sparse"] = sparse_score
|
|
805
|
+
|
|
806
|
+
# 6. Bimodal structure detection
|
|
807
|
+
bimodal_score = _detect_bimodal_structure(pos_tensor, neg_tensor, diff_vectors, cfg)
|
|
808
|
+
raw_scores["bimodal"] = bimodal_score
|
|
809
|
+
|
|
810
|
+
# 7. Orthogonal subspaces detection
|
|
811
|
+
orthogonal_score = _detect_orthogonal_structure(pos_tensor, neg_tensor, diff_vectors, cfg)
|
|
812
|
+
raw_scores["orthogonal"] = orthogonal_score
|
|
813
|
+
|
|
814
|
+
# Use raw scores directly - no penalization
|
|
815
|
+
# The recommendation logic will handle specificity (prefer simpler structures when they fit)
|
|
816
|
+
all_scores = raw_scores
|
|
817
|
+
|
|
818
|
+
# Find MOST SPECIFIC structure that fits well
|
|
819
|
+
# Specificity order: linear > cone > orthogonal > cluster > sparse > bimodal > manifold
|
|
820
|
+
best_structure, best_score = _find_most_specific_structure(all_scores)
|
|
821
|
+
|
|
822
|
+
# Generate recommendation based on specificity
|
|
823
|
+
recommendation = _generate_recommendation(best_structure, all_scores)
|
|
824
|
+
|
|
825
|
+
return GeometryAnalysisResult(
|
|
826
|
+
best_structure=best_structure,
|
|
827
|
+
best_score=best_score,
|
|
828
|
+
all_scores=all_scores,
|
|
829
|
+
recommendation=recommendation,
|
|
830
|
+
details={
|
|
831
|
+
"config": cfg.__dict__,
|
|
832
|
+
"n_positive": pos_tensor.shape[0],
|
|
833
|
+
"n_negative": neg_tensor.shape[0],
|
|
834
|
+
"hidden_dim": pos_tensor.shape[1],
|
|
835
|
+
}
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def _find_most_specific_structure(scores: Dict[str, StructureScore]) -> Tuple[StructureType, float]:
|
|
840
|
+
"""
|
|
841
|
+
Find the most specific structure that fits the data well.
|
|
842
|
+
|
|
843
|
+
Specificity order (most to least specific):
|
|
844
|
+
1. Linear - single direction (most specific)
|
|
845
|
+
2. Cone - correlated directions
|
|
846
|
+
3. Orthogonal - uncorrelated directions
|
|
847
|
+
4. Cluster - discrete groups
|
|
848
|
+
5. Sparse - few active neurons
|
|
849
|
+
6. Bimodal - two modes
|
|
850
|
+
7. Manifold - any continuous structure (least specific, always fits)
|
|
851
|
+
|
|
852
|
+
We pick the MOST SPECIFIC structure that exceeds its threshold.
|
|
853
|
+
More specific = more constrained = more informative about the data.
|
|
854
|
+
"""
|
|
855
|
+
# Thresholds for "this structure fits well enough"
|
|
856
|
+
THRESHOLDS = {
|
|
857
|
+
"linear": 0.5, # 1D separable
|
|
858
|
+
"cone": 0.5, # correlated directions
|
|
859
|
+
"orthogonal": 0.5, # independent directions
|
|
860
|
+
"cluster": 0.6, # discrete groups
|
|
861
|
+
"sparse": 0.7, # few active neurons
|
|
862
|
+
"bimodal": 0.5, # two-mode distribution
|
|
863
|
+
"manifold": 0.3, # fallback (always fits)
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
# Check in specificity order
|
|
867
|
+
specificity_order = ["linear", "cone", "orthogonal", "cluster", "sparse", "bimodal", "manifold"]
|
|
868
|
+
|
|
869
|
+
for struct_name in specificity_order:
|
|
870
|
+
if struct_name in scores:
|
|
871
|
+
score = scores[struct_name].score
|
|
872
|
+
threshold = THRESHOLDS.get(struct_name, 0.5)
|
|
873
|
+
if score >= threshold:
|
|
874
|
+
return scores[struct_name].structure_type, score
|
|
875
|
+
|
|
876
|
+
# Fallback: return highest scoring structure
|
|
877
|
+
best_key = max(scores.keys(), key=lambda k: scores[k].score)
|
|
878
|
+
return scores[best_key].structure_type, scores[best_key].score
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
def _apply_hierarchical_scoring(raw_scores: Dict[str, StructureScore]) -> Dict[str, StructureScore]:
|
|
882
|
+
"""
|
|
883
|
+
Apply hierarchical scoring to make structure types mutually exclusive.
|
|
884
|
+
|
|
885
|
+
Hierarchy (simpler to more complex):
|
|
886
|
+
1. Linear - if high, don't credit cone/manifold
|
|
887
|
+
2. Cone - if high (and linear is low), don't credit manifold
|
|
888
|
+
3. Cluster - independent axis (discrete vs continuous)
|
|
889
|
+
4. Sparse - independent axis (encoding style)
|
|
890
|
+
5. Bimodal - independent axis
|
|
891
|
+
6. Orthogonal - alternative to cone (uncorrelated vs correlated directions)
|
|
892
|
+
7. Manifold - fallback (only if nothing else fits)
|
|
893
|
+
|
|
894
|
+
The adjusted score represents: "How well does THIS structure explain
|
|
895
|
+
what simpler structures cannot?"
|
|
896
|
+
"""
|
|
897
|
+
adjusted: Dict[str, StructureScore] = {}
|
|
898
|
+
|
|
899
|
+
linear_raw = raw_scores.get("linear", StructureScore(StructureType.LINEAR, 0, 0)).score
|
|
900
|
+
cone_raw = raw_scores.get("cone", StructureScore(StructureType.CONE, 0, 0)).score
|
|
901
|
+
cluster_raw = raw_scores.get("cluster", StructureScore(StructureType.CLUSTER, 0, 0)).score
|
|
902
|
+
manifold_raw = raw_scores.get("manifold", StructureScore(StructureType.MANIFOLD, 0, 0)).score
|
|
903
|
+
sparse_raw = raw_scores.get("sparse", StructureScore(StructureType.SPARSE, 0, 0)).score
|
|
904
|
+
bimodal_raw = raw_scores.get("bimodal", StructureScore(StructureType.BIMODAL, 0, 0)).score
|
|
905
|
+
orthogonal_raw = raw_scores.get("orthogonal", StructureScore(StructureType.ORTHOGONAL, 0, 0)).score
|
|
906
|
+
|
|
907
|
+
# Thresholds for "structure is sufficient"
|
|
908
|
+
LINEAR_THRESHOLD = 0.6 # If linear > 0.6, linear structure is sufficient
|
|
909
|
+
CONE_THRESHOLD = 0.5 # If cone > 0.5 (after adjustment), cone is sufficient
|
|
910
|
+
|
|
911
|
+
# 1. LINEAR: No adjustment needed - it's the simplest
|
|
912
|
+
adjusted["linear"] = StructureScore(
|
|
913
|
+
StructureType.LINEAR,
|
|
914
|
+
score=linear_raw,
|
|
915
|
+
confidence=raw_scores["linear"].confidence,
|
|
916
|
+
details={**raw_scores["linear"].details, "adjustment": "none (baseline)"}
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
# 2. CONE: Only credit if linear is insufficient
|
|
920
|
+
# Cone score = raw_cone * (1 - linear_sufficiency)
|
|
921
|
+
linear_sufficiency = min(1.0, linear_raw / LINEAR_THRESHOLD) if linear_raw > 0 else 0
|
|
922
|
+
cone_adjusted = cone_raw * (1 - linear_sufficiency * 0.8) # Reduce cone if linear is good
|
|
923
|
+
adjusted["cone"] = StructureScore(
|
|
924
|
+
StructureType.CONE,
|
|
925
|
+
score=cone_adjusted,
|
|
926
|
+
confidence=raw_scores["cone"].confidence,
|
|
927
|
+
details={**raw_scores["cone"].details, "adjustment": f"reduced by linear_sufficiency={linear_sufficiency:.2f}"}
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
# 3. MANIFOLD: Only credit if both linear AND cone are insufficient
|
|
931
|
+
# This is the "fallback" - only use if simpler structures don't work
|
|
932
|
+
cone_sufficiency = min(1.0, max(linear_raw, cone_raw) / CONE_THRESHOLD)
|
|
933
|
+
manifold_adjusted = manifold_raw * (1 - cone_sufficiency * 0.9) # Heavily penalize if simpler works
|
|
934
|
+
adjusted["manifold"] = StructureScore(
|
|
935
|
+
StructureType.MANIFOLD,
|
|
936
|
+
score=manifold_adjusted,
|
|
937
|
+
confidence=raw_scores["manifold"].confidence,
|
|
938
|
+
details={**raw_scores["manifold"].details, "adjustment": f"reduced by cone_sufficiency={cone_sufficiency:.2f}"}
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
# 4. CLUSTER: Independent axis - but penalize if continuous structures work
|
|
942
|
+
# Cluster is meaningful only if data is truly discrete, not continuous
|
|
943
|
+
continuous_score = max(linear_raw, cone_raw)
|
|
944
|
+
cluster_adjusted = cluster_raw * (1 - continuous_score * 0.5)
|
|
945
|
+
adjusted["cluster"] = StructureScore(
|
|
946
|
+
StructureType.CLUSTER,
|
|
947
|
+
score=cluster_adjusted,
|
|
948
|
+
confidence=raw_scores["cluster"].confidence,
|
|
949
|
+
details={**raw_scores["cluster"].details, "adjustment": f"reduced by continuous_score={continuous_score:.2f}"}
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
# 5. SPARSE: Independent axis - about encoding style, not geometry
|
|
953
|
+
# Keep mostly unchanged, slight penalty if linear is very high (sparse + linear = still linear)
|
|
954
|
+
sparse_adjusted = sparse_raw * (1 - linear_raw * 0.3)
|
|
955
|
+
adjusted["sparse"] = StructureScore(
|
|
956
|
+
StructureType.SPARSE,
|
|
957
|
+
score=sparse_adjusted,
|
|
958
|
+
confidence=raw_scores["sparse"].confidence,
|
|
959
|
+
details={**raw_scores["sparse"].details, "adjustment": f"slight reduction for linear={linear_raw:.2f}"}
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
# 6. BIMODAL: Independent axis - about distribution shape
|
|
963
|
+
# No adjustment needed
|
|
964
|
+
adjusted["bimodal"] = StructureScore(
|
|
965
|
+
StructureType.BIMODAL,
|
|
966
|
+
score=bimodal_raw,
|
|
967
|
+
confidence=raw_scores["bimodal"].confidence,
|
|
968
|
+
details={**raw_scores["bimodal"].details, "adjustment": "none (independent axis)"}
|
|
969
|
+
)
|
|
970
|
+
|
|
971
|
+
# 7. ORTHOGONAL: Alternative to cone (mutually exclusive)
|
|
972
|
+
# If directions are correlated (cone), they're not orthogonal
|
|
973
|
+
# Only credit orthogonal if cone is low
|
|
974
|
+
orthogonal_adjusted = orthogonal_raw * (1 - cone_raw * 0.7)
|
|
975
|
+
adjusted["orthogonal"] = StructureScore(
|
|
976
|
+
StructureType.ORTHOGONAL,
|
|
977
|
+
score=orthogonal_adjusted,
|
|
978
|
+
confidence=raw_scores["orthogonal"].confidence,
|
|
979
|
+
details={**raw_scores["orthogonal"].details, "adjustment": f"reduced by cone={cone_raw:.2f}"}
|
|
980
|
+
)
|
|
981
|
+
|
|
982
|
+
return adjusted
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def _detect_linear_structure(
|
|
986
|
+
pos_tensor: torch.Tensor,
|
|
987
|
+
neg_tensor: torch.Tensor,
|
|
988
|
+
diff_vectors: torch.Tensor,
|
|
989
|
+
cfg: GeometryAnalysisConfig,
|
|
990
|
+
) -> StructureScore:
|
|
991
|
+
"""Detect if a single linear direction captures the behavior."""
|
|
992
|
+
if pos_tensor.shape[0] < 2 or neg_tensor.shape[0] < 2:
|
|
993
|
+
return StructureScore(StructureType.LINEAR, 0.0, 0.0, {"reason": "insufficient_data"})
|
|
994
|
+
|
|
995
|
+
try:
|
|
996
|
+
# Compute mean difference direction
|
|
997
|
+
mean_diff = pos_tensor.mean(dim=0) - neg_tensor.mean(dim=0)
|
|
998
|
+
mean_diff_norm = mean_diff.norm()
|
|
999
|
+
if mean_diff_norm < 1e-8:
|
|
1000
|
+
return StructureScore(StructureType.LINEAR, 0.0, 0.0, {"reason": "no_separation"})
|
|
1001
|
+
|
|
1002
|
+
primary_dir = mean_diff / mean_diff_norm
|
|
1003
|
+
|
|
1004
|
+
# Project all samples onto primary direction
|
|
1005
|
+
pos_proj = pos_tensor @ primary_dir
|
|
1006
|
+
neg_proj = neg_tensor @ primary_dir
|
|
1007
|
+
|
|
1008
|
+
# Measure separation quality (Cohen's d)
|
|
1009
|
+
pos_mean, pos_std = pos_proj.mean(), pos_proj.std()
|
|
1010
|
+
neg_mean, neg_std = neg_proj.mean(), neg_proj.std()
|
|
1011
|
+
pooled_std = ((pos_std**2 + neg_std**2) / 2).sqrt()
|
|
1012
|
+
cohens_d = abs(pos_mean - neg_mean) / (pooled_std + 1e-8)
|
|
1013
|
+
|
|
1014
|
+
# Measure variance explained by single direction
|
|
1015
|
+
# Compute residual variance after projecting out primary direction
|
|
1016
|
+
pos_residual = pos_tensor - (pos_proj.unsqueeze(1) * primary_dir.unsqueeze(0))
|
|
1017
|
+
neg_residual = neg_tensor - (neg_proj.unsqueeze(1) * primary_dir.unsqueeze(0))
|
|
1018
|
+
|
|
1019
|
+
total_var = pos_tensor.var() + neg_tensor.var()
|
|
1020
|
+
residual_var = pos_residual.var() + neg_residual.var()
|
|
1021
|
+
variance_explained = 1 - (residual_var / (total_var + 1e-8))
|
|
1022
|
+
variance_explained = max(0, min(1, float(variance_explained)))
|
|
1023
|
+
|
|
1024
|
+
# Measure within-class consistency (low spread along primary direction)
|
|
1025
|
+
within_class_spread = (pos_std + neg_std) / 2
|
|
1026
|
+
between_class_dist = abs(pos_mean - neg_mean)
|
|
1027
|
+
spread_ratio = within_class_spread / (between_class_dist + 1e-8)
|
|
1028
|
+
consistency = max(0, 1 - spread_ratio) # High when spread is low relative to separation
|
|
1029
|
+
|
|
1030
|
+
# Linear score: high cohens_d + high variance explained + high consistency
|
|
1031
|
+
linear_score = (
|
|
1032
|
+
0.35 * min(float(cohens_d) / 5, 1.0) + # Separation quality
|
|
1033
|
+
0.35 * variance_explained + # Single direction captures variance
|
|
1034
|
+
0.30 * consistency # Low within-class variance
|
|
1035
|
+
)
|
|
1036
|
+
|
|
1037
|
+
confidence = min(1.0, (pos_tensor.shape[0] + neg_tensor.shape[0]) / 50)
|
|
1038
|
+
|
|
1039
|
+
return StructureScore(
|
|
1040
|
+
StructureType.LINEAR,
|
|
1041
|
+
score=float(linear_score),
|
|
1042
|
+
confidence=float(confidence),
|
|
1043
|
+
details={
|
|
1044
|
+
"cohens_d": float(cohens_d),
|
|
1045
|
+
"variance_explained": float(variance_explained),
|
|
1046
|
+
"within_class_consistency": float(consistency),
|
|
1047
|
+
"pos_std": float(pos_std),
|
|
1048
|
+
"neg_std": float(neg_std),
|
|
1049
|
+
"separation": float(between_class_dist),
|
|
1050
|
+
}
|
|
1051
|
+
)
|
|
1052
|
+
except Exception as e:
|
|
1053
|
+
return StructureScore(StructureType.LINEAR, 0.0, 0.0, {"error": str(e)})
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
def _detect_cone_structure_score(
|
|
1057
|
+
pos_tensor: torch.Tensor,
|
|
1058
|
+
neg_tensor: torch.Tensor,
|
|
1059
|
+
cfg: GeometryAnalysisConfig,
|
|
1060
|
+
) -> StructureScore:
|
|
1061
|
+
"""Detect cone structure and return as StructureScore."""
|
|
1062
|
+
cone_config = ConeAnalysisConfig(
|
|
1063
|
+
num_directions=cfg.num_components,
|
|
1064
|
+
optimization_steps=cfg.optimization_steps,
|
|
1065
|
+
cone_threshold=cfg.cone_threshold,
|
|
1066
|
+
)
|
|
1067
|
+
|
|
1068
|
+
try:
|
|
1069
|
+
result = check_cone_structure(pos_tensor, neg_tensor, cone_config)
|
|
1070
|
+
|
|
1071
|
+
# Cone is meaningful when:
|
|
1072
|
+
# 1. Multiple directions are needed (PCA doesn't capture everything)
|
|
1073
|
+
# 2. But directions are correlated (same half-space)
|
|
1074
|
+
# 3. Cosine similarity is moderate (0.3-0.7 range ideal)
|
|
1075
|
+
|
|
1076
|
+
# Penalize if PCA already explains most variance (that's linear, not cone)
|
|
1077
|
+
pca_penalty = result.pca_explained_variance # High PCA = linear is enough
|
|
1078
|
+
|
|
1079
|
+
# Reward if cone explains more than PCA
|
|
1080
|
+
cone_advantage = max(0, result.cone_explained_variance - result.pca_explained_variance)
|
|
1081
|
+
|
|
1082
|
+
# Cone needs moderate cosine similarity - not too high (= linear) not too low (= orthogonal)
|
|
1083
|
+
cos_sim = result.avg_cosine_similarity
|
|
1084
|
+
if cos_sim > 0.85:
|
|
1085
|
+
# Very high similarity means directions are basically the same = linear
|
|
1086
|
+
cosine_score = 0.3
|
|
1087
|
+
elif cos_sim > 0.7:
|
|
1088
|
+
cosine_score = 0.7
|
|
1089
|
+
elif cos_sim > 0.3:
|
|
1090
|
+
# Ideal range for cone
|
|
1091
|
+
cosine_score = 1.0
|
|
1092
|
+
else:
|
|
1093
|
+
# Too different = not a cone
|
|
1094
|
+
cosine_score = max(0, cos_sim / 0.3)
|
|
1095
|
+
|
|
1096
|
+
# Multiple significant directions needed
|
|
1097
|
+
significant_dirs = sum(1 for s in result.separation_scores if abs(s) > 0.1)
|
|
1098
|
+
multi_dir_score = min(significant_dirs / cfg.num_components, 1.0)
|
|
1099
|
+
|
|
1100
|
+
# Adjusted cone score
|
|
1101
|
+
cone_score = (
|
|
1102
|
+
0.25 * result.half_space_consistency +
|
|
1103
|
+
0.25 * cosine_score +
|
|
1104
|
+
0.20 * cone_advantage +
|
|
1105
|
+
0.15 * multi_dir_score +
|
|
1106
|
+
0.15 * (1 - pca_penalty) # Penalize when PCA is sufficient
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
return StructureScore(
|
|
1110
|
+
StructureType.CONE,
|
|
1111
|
+
score=float(cone_score),
|
|
1112
|
+
confidence=result.half_space_consistency,
|
|
1113
|
+
details={
|
|
1114
|
+
"pca_explained": result.pca_explained_variance,
|
|
1115
|
+
"cone_explained": result.cone_explained_variance,
|
|
1116
|
+
"cone_advantage": float(cone_advantage),
|
|
1117
|
+
"avg_cosine_similarity": result.avg_cosine_similarity,
|
|
1118
|
+
"half_space_consistency": result.half_space_consistency,
|
|
1119
|
+
"num_directions": result.num_directions_found,
|
|
1120
|
+
"significant_directions": significant_dirs,
|
|
1121
|
+
}
|
|
1122
|
+
)
|
|
1123
|
+
except Exception as e:
|
|
1124
|
+
return StructureScore(StructureType.CONE, 0.0, 0.0, {"error": str(e)})
|
|
1125
|
+
|
|
1126
|
+
|
|
1127
|
+
def _detect_cluster_structure(
|
|
1128
|
+
pos_tensor: torch.Tensor,
|
|
1129
|
+
neg_tensor: torch.Tensor,
|
|
1130
|
+
diff_vectors: torch.Tensor,
|
|
1131
|
+
cfg: GeometryAnalysisConfig,
|
|
1132
|
+
) -> StructureScore:
|
|
1133
|
+
"""Detect if activations form discrete clusters."""
|
|
1134
|
+
all_activations = torch.cat([pos_tensor, neg_tensor], dim=0)
|
|
1135
|
+
n_samples = all_activations.shape[0]
|
|
1136
|
+
|
|
1137
|
+
if n_samples < 6:
|
|
1138
|
+
return StructureScore(StructureType.CLUSTER, 0.0, 0.0, {"reason": "insufficient_data"})
|
|
1139
|
+
|
|
1140
|
+
best_silhouette = -1.0
|
|
1141
|
+
best_k = 2
|
|
1142
|
+
silhouette_scores = {}
|
|
1143
|
+
|
|
1144
|
+
for k in range(2, min(cfg.max_clusters + 1, n_samples // 2)):
|
|
1145
|
+
try:
|
|
1146
|
+
# Simple k-means implementation
|
|
1147
|
+
labels, centroids, silhouette = _kmeans_with_silhouette(all_activations, k, max_iters=50)
|
|
1148
|
+
silhouette_scores[k] = silhouette
|
|
1149
|
+
|
|
1150
|
+
if silhouette > best_silhouette:
|
|
1151
|
+
best_silhouette = silhouette
|
|
1152
|
+
best_k = k
|
|
1153
|
+
except Exception:
|
|
1154
|
+
continue
|
|
1155
|
+
|
|
1156
|
+
if best_silhouette < 0:
|
|
1157
|
+
return StructureScore(StructureType.CLUSTER, 0.0, 0.0, {"reason": "clustering_failed"})
|
|
1158
|
+
|
|
1159
|
+
# Check if clusters separate pos/neg
|
|
1160
|
+
labels, _, _ = _kmeans_with_silhouette(all_activations, best_k, max_iters=50)
|
|
1161
|
+
pos_labels = labels[:pos_tensor.shape[0]]
|
|
1162
|
+
neg_labels = labels[pos_tensor.shape[0]:]
|
|
1163
|
+
|
|
1164
|
+
# Cluster purity: do pos and neg end up in different clusters?
|
|
1165
|
+
pos_majority = pos_labels.mode().values.item() if len(pos_labels) > 0 else -1
|
|
1166
|
+
neg_majority = neg_labels.mode().values.item() if len(neg_labels) > 0 else -1
|
|
1167
|
+
cluster_separation = 1.0 if pos_majority != neg_majority else 0.5
|
|
1168
|
+
|
|
1169
|
+
# Silhouette score ranges from -1 to 1, where:
|
|
1170
|
+
# > 0.7 = strong structure
|
|
1171
|
+
# 0.5-0.7 = reasonable structure
|
|
1172
|
+
# 0.25-0.5 = weak structure
|
|
1173
|
+
# < 0.25 = no substantial structure
|
|
1174
|
+
|
|
1175
|
+
# Only consider cluster structure if silhouette is reasonably high
|
|
1176
|
+
if best_silhouette < cfg.cluster_silhouette_threshold:
|
|
1177
|
+
# Low silhouette means no clear cluster structure
|
|
1178
|
+
cluster_score = best_silhouette * 0.5 # Scale down significantly
|
|
1179
|
+
else:
|
|
1180
|
+
# Good silhouette - this is truly clustered data
|
|
1181
|
+
# Normalize silhouette from [threshold, 1] to [0.5, 1]
|
|
1182
|
+
normalized_silhouette = (best_silhouette - cfg.cluster_silhouette_threshold) / (1 - cfg.cluster_silhouette_threshold)
|
|
1183
|
+
cluster_score = 0.5 + 0.4 * normalized_silhouette + 0.1 * cluster_separation
|
|
1184
|
+
|
|
1185
|
+
return StructureScore(
|
|
1186
|
+
StructureType.CLUSTER,
|
|
1187
|
+
score=float(cluster_score),
|
|
1188
|
+
confidence=float(max(0, best_silhouette)),
|
|
1189
|
+
details={
|
|
1190
|
+
"best_k": best_k,
|
|
1191
|
+
"best_silhouette": float(best_silhouette),
|
|
1192
|
+
"all_silhouettes": {str(k): float(v) for k, v in silhouette_scores.items()},
|
|
1193
|
+
"cluster_separation": float(cluster_separation),
|
|
1194
|
+
"silhouette_threshold": cfg.cluster_silhouette_threshold,
|
|
1195
|
+
}
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
|
|
1199
|
+
def _kmeans_with_silhouette(
|
|
1200
|
+
data: torch.Tensor,
|
|
1201
|
+
k: int,
|
|
1202
|
+
max_iters: int = 50,
|
|
1203
|
+
) -> Tuple[torch.Tensor, torch.Tensor, float]:
|
|
1204
|
+
"""Simple k-means with silhouette score computation."""
|
|
1205
|
+
n_samples, n_features = data.shape
|
|
1206
|
+
|
|
1207
|
+
# Initialize centroids randomly
|
|
1208
|
+
indices = torch.randperm(n_samples)[:k]
|
|
1209
|
+
centroids = data[indices].clone()
|
|
1210
|
+
|
|
1211
|
+
for _ in range(max_iters):
|
|
1212
|
+
# Assign labels
|
|
1213
|
+
distances = torch.cdist(data, centroids)
|
|
1214
|
+
labels = distances.argmin(dim=1)
|
|
1215
|
+
|
|
1216
|
+
# Update centroids
|
|
1217
|
+
new_centroids = torch.zeros_like(centroids)
|
|
1218
|
+
for i in range(k):
|
|
1219
|
+
mask = labels == i
|
|
1220
|
+
if mask.sum() > 0:
|
|
1221
|
+
new_centroids[i] = data[mask].mean(dim=0)
|
|
1222
|
+
else:
|
|
1223
|
+
new_centroids[i] = centroids[i]
|
|
1224
|
+
|
|
1225
|
+
if torch.allclose(centroids, new_centroids, atol=1e-6):
|
|
1226
|
+
break
|
|
1227
|
+
centroids = new_centroids
|
|
1228
|
+
|
|
1229
|
+
# Compute silhouette score
|
|
1230
|
+
silhouette = _compute_silhouette(data, labels, k)
|
|
1231
|
+
|
|
1232
|
+
return labels, centroids, silhouette
|
|
1233
|
+
|
|
1234
|
+
|
|
1235
|
+
def _compute_silhouette(data: torch.Tensor, labels: torch.Tensor, k: int) -> float:
|
|
1236
|
+
"""Compute silhouette score."""
|
|
1237
|
+
n_samples = data.shape[0]
|
|
1238
|
+
if n_samples < 2 or k < 2:
|
|
1239
|
+
return 0.0
|
|
1240
|
+
|
|
1241
|
+
silhouette_samples = []
|
|
1242
|
+
|
|
1243
|
+
for i in range(n_samples):
|
|
1244
|
+
label_i = labels[i].item()
|
|
1245
|
+
|
|
1246
|
+
# a(i): mean distance to same cluster
|
|
1247
|
+
same_cluster = data[labels == label_i]
|
|
1248
|
+
if same_cluster.shape[0] > 1:
|
|
1249
|
+
a_i = (data[i] - same_cluster).norm(dim=1).sum() / (same_cluster.shape[0] - 1)
|
|
1250
|
+
else:
|
|
1251
|
+
a_i = 0.0
|
|
1252
|
+
|
|
1253
|
+
# b(i): min mean distance to other clusters
|
|
1254
|
+
b_i = float('inf')
|
|
1255
|
+
for j in range(k):
|
|
1256
|
+
if j != label_i:
|
|
1257
|
+
other_cluster = data[labels == j]
|
|
1258
|
+
if other_cluster.shape[0] > 0:
|
|
1259
|
+
mean_dist = (data[i] - other_cluster).norm(dim=1).mean()
|
|
1260
|
+
b_i = min(b_i, mean_dist.item())
|
|
1261
|
+
|
|
1262
|
+
if b_i == float('inf'):
|
|
1263
|
+
b_i = 0.0
|
|
1264
|
+
|
|
1265
|
+
# Silhouette for sample i
|
|
1266
|
+
if max(a_i, b_i) > 0:
|
|
1267
|
+
s_i = (b_i - a_i) / max(a_i, b_i)
|
|
1268
|
+
else:
|
|
1269
|
+
s_i = 0.0
|
|
1270
|
+
|
|
1271
|
+
silhouette_samples.append(s_i)
|
|
1272
|
+
|
|
1273
|
+
return float(sum(silhouette_samples) / len(silhouette_samples)) if silhouette_samples else 0.0
|
|
1274
|
+
|
|
1275
|
+
|
|
1276
|
+
def _detect_manifold_structure(
|
|
1277
|
+
pos_tensor: torch.Tensor,
|
|
1278
|
+
neg_tensor: torch.Tensor,
|
|
1279
|
+
diff_vectors: torch.Tensor,
|
|
1280
|
+
cfg: GeometryAnalysisConfig,
|
|
1281
|
+
) -> StructureScore:
|
|
1282
|
+
"""Detect non-linear manifold structure via intrinsic dimensionality."""
|
|
1283
|
+
all_activations = torch.cat([pos_tensor, neg_tensor], dim=0)
|
|
1284
|
+
n_samples = all_activations.shape[0]
|
|
1285
|
+
|
|
1286
|
+
if n_samples < cfg.manifold_neighbors + 1:
|
|
1287
|
+
return StructureScore(StructureType.MANIFOLD, 0.0, 0.0, {"reason": "insufficient_data"})
|
|
1288
|
+
|
|
1289
|
+
try:
|
|
1290
|
+
# First check if there's meaningful separation
|
|
1291
|
+
mean_diff = pos_tensor.mean(dim=0) - neg_tensor.mean(dim=0)
|
|
1292
|
+
separation_strength = mean_diff.norm() / (pos_tensor.std() + neg_tensor.std() + 1e-8)
|
|
1293
|
+
has_structure = min(float(separation_strength) / 2, 1.0)
|
|
1294
|
+
|
|
1295
|
+
if has_structure < 0.2:
|
|
1296
|
+
# No meaningful separation - can't determine manifold structure
|
|
1297
|
+
return StructureScore(StructureType.MANIFOLD, 0.1, 0.0, {"reason": "no_separation"})
|
|
1298
|
+
|
|
1299
|
+
# Estimate intrinsic dimensionality using correlation dimension
|
|
1300
|
+
intrinsic_dim = _estimate_intrinsic_dimensionality(all_activations, cfg.manifold_neighbors)
|
|
1301
|
+
|
|
1302
|
+
# Compare to ambient dimension
|
|
1303
|
+
ambient_dim = all_activations.shape[1]
|
|
1304
|
+
dim_ratio = intrinsic_dim / ambient_dim
|
|
1305
|
+
|
|
1306
|
+
# Also compute local linearity deviation
|
|
1307
|
+
local_nonlinearity = _compute_local_nonlinearity(all_activations, cfg.manifold_neighbors)
|
|
1308
|
+
|
|
1309
|
+
# Manifold score: high if low intrinsic dim AND non-linear AND has structure
|
|
1310
|
+
# Low intrinsic dim alone could be linear, so we need nonlinearity
|
|
1311
|
+
# But random noise also has "nonlinearity" - need to distinguish
|
|
1312
|
+
|
|
1313
|
+
# Manifold is meaningful only with significant dimension reduction
|
|
1314
|
+
if dim_ratio > 0.5:
|
|
1315
|
+
# Not much dimension reduction = not a clear manifold
|
|
1316
|
+
manifold_score = 0.3 * has_structure
|
|
1317
|
+
else:
|
|
1318
|
+
manifold_score = (
|
|
1319
|
+
0.30 * (1 - dim_ratio) +
|
|
1320
|
+
0.25 * local_nonlinearity +
|
|
1321
|
+
0.45 * has_structure # Weight structure heavily
|
|
1322
|
+
)
|
|
1323
|
+
|
|
1324
|
+
# Confidence based on sample size
|
|
1325
|
+
confidence = min(1.0, n_samples / 100)
|
|
1326
|
+
|
|
1327
|
+
return StructureScore(
|
|
1328
|
+
StructureType.MANIFOLD,
|
|
1329
|
+
score=float(manifold_score),
|
|
1330
|
+
confidence=float(confidence),
|
|
1331
|
+
details={
|
|
1332
|
+
"intrinsic_dimensionality": float(intrinsic_dim),
|
|
1333
|
+
"ambient_dimensionality": ambient_dim,
|
|
1334
|
+
"dim_ratio": float(dim_ratio),
|
|
1335
|
+
"local_nonlinearity": float(local_nonlinearity),
|
|
1336
|
+
}
|
|
1337
|
+
)
|
|
1338
|
+
except Exception as e:
|
|
1339
|
+
return StructureScore(StructureType.MANIFOLD, 0.0, 0.0, {"error": str(e)})
|
|
1340
|
+
|
|
1341
|
+
|
|
1342
|
+
def _estimate_intrinsic_dimensionality(data: torch.Tensor, k: int) -> float:
|
|
1343
|
+
"""Estimate intrinsic dimensionality using MLE method."""
|
|
1344
|
+
n_samples = data.shape[0]
|
|
1345
|
+
|
|
1346
|
+
# Compute pairwise distances
|
|
1347
|
+
distances = torch.cdist(data, data)
|
|
1348
|
+
|
|
1349
|
+
# For each point, get k nearest neighbors (excluding self)
|
|
1350
|
+
intrinsic_dims = []
|
|
1351
|
+
|
|
1352
|
+
for i in range(n_samples):
|
|
1353
|
+
dists_i = distances[i]
|
|
1354
|
+
dists_i[i] = float('inf') # Exclude self
|
|
1355
|
+
|
|
1356
|
+
# Get k smallest distances
|
|
1357
|
+
knn_dists, _ = torch.topk(dists_i, k, largest=False)
|
|
1358
|
+
knn_dists = knn_dists[knn_dists > 1e-10] # Filter zeros
|
|
1359
|
+
|
|
1360
|
+
if len(knn_dists) < 2:
|
|
1361
|
+
continue
|
|
1362
|
+
|
|
1363
|
+
# MLE estimator for intrinsic dimensionality
|
|
1364
|
+
# d = 1 / (mean(log(r_k / r_j)) for j < k)
|
|
1365
|
+
r_k = knn_dists[-1]
|
|
1366
|
+
log_ratios = torch.log(r_k / knn_dists[:-1])
|
|
1367
|
+
|
|
1368
|
+
if log_ratios.mean() > 0:
|
|
1369
|
+
d_i = 1.0 / log_ratios.mean()
|
|
1370
|
+
intrinsic_dims.append(min(float(d_i), data.shape[1])) # Cap at ambient dim
|
|
1371
|
+
|
|
1372
|
+
if not intrinsic_dims:
|
|
1373
|
+
return float(data.shape[1])
|
|
1374
|
+
|
|
1375
|
+
return float(sum(intrinsic_dims) / len(intrinsic_dims))
|
|
1376
|
+
|
|
1377
|
+
|
|
1378
|
+
def _compute_local_nonlinearity(data: torch.Tensor, k: int) -> float:
|
|
1379
|
+
"""Compute how much local neighborhoods deviate from linear."""
|
|
1380
|
+
n_samples = data.shape[0]
|
|
1381
|
+
distances = torch.cdist(data, data)
|
|
1382
|
+
|
|
1383
|
+
nonlinearity_scores = []
|
|
1384
|
+
|
|
1385
|
+
for i in range(min(n_samples, 50)): # Sample for efficiency
|
|
1386
|
+
# Get k nearest neighbors
|
|
1387
|
+
dists_i = distances[i].clone()
|
|
1388
|
+
dists_i[i] = float('inf')
|
|
1389
|
+
_, knn_indices = torch.topk(dists_i, k, largest=False)
|
|
1390
|
+
|
|
1391
|
+
# Get local neighborhood
|
|
1392
|
+
neighborhood = data[knn_indices]
|
|
1393
|
+
center = neighborhood.mean(dim=0, keepdim=True)
|
|
1394
|
+
centered = neighborhood - center
|
|
1395
|
+
|
|
1396
|
+
# PCA on local neighborhood
|
|
1397
|
+
try:
|
|
1398
|
+
_, S, _ = torch.linalg.svd(centered, full_matrices=False)
|
|
1399
|
+
|
|
1400
|
+
# Nonlinearity: how spread are singular values?
|
|
1401
|
+
# Linear would have first few dominating
|
|
1402
|
+
total_var = (S ** 2).sum()
|
|
1403
|
+
if total_var > 0:
|
|
1404
|
+
# Entropy-like measure of variance distribution
|
|
1405
|
+
var_dist = (S ** 2) / total_var
|
|
1406
|
+
var_dist = var_dist[var_dist > 1e-10]
|
|
1407
|
+
entropy = -(var_dist * torch.log(var_dist + 1e-10)).sum()
|
|
1408
|
+
max_entropy = torch.log(torch.tensor(float(len(var_dist))))
|
|
1409
|
+
nonlinearity = float(entropy / max_entropy) if max_entropy > 0 else 0.0
|
|
1410
|
+
nonlinearity_scores.append(nonlinearity)
|
|
1411
|
+
except Exception:
|
|
1412
|
+
continue
|
|
1413
|
+
|
|
1414
|
+
return float(sum(nonlinearity_scores) / len(nonlinearity_scores)) if nonlinearity_scores else 0.0
|
|
1415
|
+
|
|
1416
|
+
|
|
1417
|
+
def _detect_sparse_structure(
|
|
1418
|
+
pos_tensor: torch.Tensor,
|
|
1419
|
+
neg_tensor: torch.Tensor,
|
|
1420
|
+
diff_vectors: torch.Tensor,
|
|
1421
|
+
cfg: GeometryAnalysisConfig,
|
|
1422
|
+
) -> StructureScore:
|
|
1423
|
+
"""Detect if behavior is encoded in sparse neuron activations."""
|
|
1424
|
+
# Mean difference vector
|
|
1425
|
+
mean_diff = pos_tensor.mean(dim=0) - neg_tensor.mean(dim=0)
|
|
1426
|
+
|
|
1427
|
+
# Compute sparsity metrics
|
|
1428
|
+
abs_diff = mean_diff.abs()
|
|
1429
|
+
|
|
1430
|
+
# L1/L2 ratio (lower = sparser)
|
|
1431
|
+
l1_norm = abs_diff.sum()
|
|
1432
|
+
l2_norm = abs_diff.norm()
|
|
1433
|
+
|
|
1434
|
+
if l2_norm > 0:
|
|
1435
|
+
l1_l2_ratio = l1_norm / (l2_norm * (len(mean_diff) ** 0.5))
|
|
1436
|
+
else:
|
|
1437
|
+
l1_l2_ratio = 1.0
|
|
1438
|
+
|
|
1439
|
+
# Fraction of "active" dimensions (above threshold)
|
|
1440
|
+
threshold = abs_diff.max() * cfg.sparse_threshold
|
|
1441
|
+
active_fraction = (abs_diff > threshold).float().mean()
|
|
1442
|
+
|
|
1443
|
+
# Gini coefficient (measures inequality)
|
|
1444
|
+
sorted_abs = abs_diff.sort().values
|
|
1445
|
+
n = len(sorted_abs)
|
|
1446
|
+
cumsum = sorted_abs.cumsum(0)
|
|
1447
|
+
gini = (2 * torch.arange(1, n + 1, dtype=torch.float32) @ sorted_abs - (n + 1) * sorted_abs.sum()) / (n * sorted_abs.sum() + 1e-10)
|
|
1448
|
+
|
|
1449
|
+
# Sparse score: high if few dimensions are active
|
|
1450
|
+
sparse_score = 0.4 * (1 - float(l1_l2_ratio)) + 0.3 * (1 - float(active_fraction)) + 0.3 * float(gini)
|
|
1451
|
+
sparse_score = max(0, min(1, sparse_score))
|
|
1452
|
+
|
|
1453
|
+
# Top contributing dimensions
|
|
1454
|
+
top_k = min(10, len(mean_diff))
|
|
1455
|
+
top_values, top_indices = torch.topk(abs_diff, top_k)
|
|
1456
|
+
top_contribution = top_values.sum() / (abs_diff.sum() + 1e-10)
|
|
1457
|
+
|
|
1458
|
+
return StructureScore(
|
|
1459
|
+
StructureType.SPARSE,
|
|
1460
|
+
score=float(sparse_score),
|
|
1461
|
+
confidence=min(1.0, (pos_tensor.shape[0] + neg_tensor.shape[0]) / 30),
|
|
1462
|
+
details={
|
|
1463
|
+
"l1_l2_ratio": float(l1_l2_ratio),
|
|
1464
|
+
"active_fraction": float(active_fraction),
|
|
1465
|
+
"gini_coefficient": float(gini),
|
|
1466
|
+
"top_10_contribution": float(top_contribution),
|
|
1467
|
+
"top_indices": top_indices.tolist(),
|
|
1468
|
+
}
|
|
1469
|
+
)
|
|
1470
|
+
|
|
1471
|
+
|
|
1472
|
+
def _detect_bimodal_structure(
|
|
1473
|
+
pos_tensor: torch.Tensor,
|
|
1474
|
+
neg_tensor: torch.Tensor,
|
|
1475
|
+
diff_vectors: torch.Tensor,
|
|
1476
|
+
cfg: GeometryAnalysisConfig,
|
|
1477
|
+
) -> StructureScore:
|
|
1478
|
+
"""Detect if activations have bimodal/multimodal distribution."""
|
|
1479
|
+
# Project onto principal direction
|
|
1480
|
+
mean_diff = pos_tensor.mean(dim=0) - neg_tensor.mean(dim=0)
|
|
1481
|
+
direction = F.normalize(mean_diff, p=2, dim=0)
|
|
1482
|
+
|
|
1483
|
+
all_activations = torch.cat([pos_tensor, neg_tensor], dim=0)
|
|
1484
|
+
projections = (all_activations @ direction).cpu()
|
|
1485
|
+
|
|
1486
|
+
# Hartigan's dip test approximation
|
|
1487
|
+
dip_statistic = _compute_dip_statistic(projections)
|
|
1488
|
+
|
|
1489
|
+
# Check separation between pos and neg projections
|
|
1490
|
+
pos_proj = (pos_tensor @ direction)
|
|
1491
|
+
neg_proj = (neg_tensor @ direction)
|
|
1492
|
+
|
|
1493
|
+
# Overlap between distributions
|
|
1494
|
+
pos_mean, pos_std = pos_proj.mean(), pos_proj.std()
|
|
1495
|
+
neg_mean, neg_std = neg_proj.mean(), neg_proj.std()
|
|
1496
|
+
|
|
1497
|
+
# Bhattacharyya distance approximation
|
|
1498
|
+
if pos_std > 0 and neg_std > 0:
|
|
1499
|
+
mean_diff_normalized = abs(pos_mean - neg_mean) / ((pos_std + neg_std) / 2)
|
|
1500
|
+
else:
|
|
1501
|
+
mean_diff_normalized = 0.0
|
|
1502
|
+
|
|
1503
|
+
# Bimodal score: high dip + clear separation
|
|
1504
|
+
bimodal_score = 0.5 * min(float(dip_statistic) * 10, 1.0) + 0.5 * min(float(mean_diff_normalized) / 3, 1.0)
|
|
1505
|
+
|
|
1506
|
+
return StructureScore(
|
|
1507
|
+
StructureType.BIMODAL,
|
|
1508
|
+
score=float(bimodal_score),
|
|
1509
|
+
confidence=min(1.0, len(projections) / 50),
|
|
1510
|
+
details={
|
|
1511
|
+
"dip_statistic": float(dip_statistic),
|
|
1512
|
+
"mean_separation": float(mean_diff_normalized),
|
|
1513
|
+
"pos_mean": float(pos_mean),
|
|
1514
|
+
"neg_mean": float(neg_mean),
|
|
1515
|
+
"pos_std": float(pos_std),
|
|
1516
|
+
"neg_std": float(neg_std),
|
|
1517
|
+
}
|
|
1518
|
+
)
|
|
1519
|
+
|
|
1520
|
+
|
|
1521
|
+
def _compute_dip_statistic(data: torch.Tensor) -> float:
|
|
1522
|
+
"""Compute Hartigan's dip statistic (simplified)."""
|
|
1523
|
+
sorted_data = data.sort().values
|
|
1524
|
+
n = len(sorted_data)
|
|
1525
|
+
|
|
1526
|
+
if n < 4:
|
|
1527
|
+
return 0.0
|
|
1528
|
+
|
|
1529
|
+
# Empirical CDF
|
|
1530
|
+
ecdf = torch.arange(1, n + 1, dtype=torch.float32) / n
|
|
1531
|
+
|
|
1532
|
+
# Greatest convex minorant and least concave majorant
|
|
1533
|
+
# Simplified: measure deviation from uniform
|
|
1534
|
+
uniform = torch.linspace(0, 1, n)
|
|
1535
|
+
|
|
1536
|
+
# Kolmogorov-Smirnov like statistic
|
|
1537
|
+
ks_stat = (ecdf - uniform).abs().max()
|
|
1538
|
+
|
|
1539
|
+
return float(ks_stat)
|
|
1540
|
+
|
|
1541
|
+
|
|
1542
|
+
def _detect_orthogonal_structure(
|
|
1543
|
+
pos_tensor: torch.Tensor,
|
|
1544
|
+
neg_tensor: torch.Tensor,
|
|
1545
|
+
diff_vectors: torch.Tensor,
|
|
1546
|
+
cfg: GeometryAnalysisConfig,
|
|
1547
|
+
) -> StructureScore:
|
|
1548
|
+
"""Detect if behavior is encoded in multiple orthogonal/independent subspaces.
|
|
1549
|
+
|
|
1550
|
+
Orthogonal structure means the data requires MULTIPLE independent directions
|
|
1551
|
+
that are NOT correlated with each other. This is different from cone (where
|
|
1552
|
+
directions are correlated) and linear (where one direction suffices).
|
|
1553
|
+
"""
|
|
1554
|
+
if diff_vectors.shape[0] < cfg.num_components:
|
|
1555
|
+
return StructureScore(StructureType.ORTHOGONAL, 0.0, 0.0, {"reason": "insufficient_data"})
|
|
1556
|
+
|
|
1557
|
+
try:
|
|
1558
|
+
# PCA to understand variance distribution
|
|
1559
|
+
centered = diff_vectors - diff_vectors.mean(dim=0, keepdim=True)
|
|
1560
|
+
_, S, Vh = torch.linalg.svd(centered, full_matrices=False)
|
|
1561
|
+
|
|
1562
|
+
total_var = (S ** 2).sum()
|
|
1563
|
+
if total_var < 1e-8:
|
|
1564
|
+
return StructureScore(StructureType.ORTHOGONAL, 0.0, 0.0, {"reason": "no_variance"})
|
|
1565
|
+
|
|
1566
|
+
# For orthogonal structure:
|
|
1567
|
+
# 1. Multiple components should have significant variance (not just one = linear)
|
|
1568
|
+
# 2. Variance should be spread across multiple dimensions (not concentrated)
|
|
1569
|
+
|
|
1570
|
+
var_explained = (S ** 2) / total_var
|
|
1571
|
+
k = min(cfg.num_components, len(S))
|
|
1572
|
+
|
|
1573
|
+
# First component dominance (low = more orthogonal/spread)
|
|
1574
|
+
first_var = float(var_explained[0])
|
|
1575
|
+
|
|
1576
|
+
# Effective dimensionality (entropy-based)
|
|
1577
|
+
var_explained_clipped = var_explained[var_explained > 1e-10]
|
|
1578
|
+
entropy = -(var_explained_clipped * torch.log(var_explained_clipped + 1e-10)).sum()
|
|
1579
|
+
max_entropy = torch.log(torch.tensor(float(len(var_explained_clipped))))
|
|
1580
|
+
effective_dim_ratio = float(entropy / max_entropy) if max_entropy > 0 else 0.0
|
|
1581
|
+
|
|
1582
|
+
# Count significant dimensions (>5% variance each)
|
|
1583
|
+
significant_dims = (var_explained > 0.05).sum().item()
|
|
1584
|
+
multi_dim_score = min(significant_dims / 3, 1.0) # 3+ significant dims is fully orthogonal
|
|
1585
|
+
|
|
1586
|
+
# Orthogonal structure is RARE and specific:
|
|
1587
|
+
# It requires MULTIPLE INDEPENDENT directions with separation on EACH
|
|
1588
|
+
# High spread alone is not orthogonal - it could be noise or cone
|
|
1589
|
+
|
|
1590
|
+
# Check separation strength
|
|
1591
|
+
mean_diff = pos_tensor.mean(dim=0) - neg_tensor.mean(dim=0)
|
|
1592
|
+
separation_strength = mean_diff.norm() / (pos_tensor.std() + neg_tensor.std() + 1e-8)
|
|
1593
|
+
has_separation = min(float(separation_strength) / 3, 1.0)
|
|
1594
|
+
|
|
1595
|
+
# For true orthogonal structure, we need:
|
|
1596
|
+
# 1. Strong separation (otherwise no structure)
|
|
1597
|
+
# 2. Multiple significant dimensions (otherwise linear)
|
|
1598
|
+
# 3. But NOT too spread (otherwise just noise)
|
|
1599
|
+
|
|
1600
|
+
# Sweet spot: 2-4 significant dimensions with clear separation
|
|
1601
|
+
if significant_dims < 2:
|
|
1602
|
+
# Too few dimensions = linear
|
|
1603
|
+
orthogonal_score = 0.2
|
|
1604
|
+
elif significant_dims > 10:
|
|
1605
|
+
# Too many = likely noise, not structure
|
|
1606
|
+
orthogonal_score = 0.3 * has_separation
|
|
1607
|
+
else:
|
|
1608
|
+
# Reasonable number of dimensions
|
|
1609
|
+
# Check if it's not dominated by first (would be linear)
|
|
1610
|
+
# and not too spread (would be noise)
|
|
1611
|
+
structure_score = (
|
|
1612
|
+
0.3 * (1 - first_var) + # Not dominated by one direction
|
|
1613
|
+
0.3 * min(significant_dims / 4, 1.0) + # 2-4 directions is ideal
|
|
1614
|
+
0.4 * has_separation # Must have separation
|
|
1615
|
+
)
|
|
1616
|
+
orthogonal_score = structure_score * 0.8 # Scale down - orthogonal is rare
|
|
1617
|
+
|
|
1618
|
+
return StructureScore(
|
|
1619
|
+
StructureType.ORTHOGONAL,
|
|
1620
|
+
score=float(orthogonal_score),
|
|
1621
|
+
confidence=min(1.0, diff_vectors.shape[0] / 30),
|
|
1622
|
+
details={
|
|
1623
|
+
"first_component_variance": float(first_var),
|
|
1624
|
+
"effective_dim_ratio": float(effective_dim_ratio),
|
|
1625
|
+
"significant_dimensions": int(significant_dims),
|
|
1626
|
+
"top_5_variances": var_explained[:min(5, len(var_explained))].tolist(),
|
|
1627
|
+
}
|
|
1628
|
+
)
|
|
1629
|
+
except Exception as e:
|
|
1630
|
+
return StructureScore(StructureType.ORTHOGONAL, 0.0, 0.0, {"error": str(e)})
|
|
1631
|
+
|
|
1632
|
+
|
|
1633
|
+
def _generate_recommendation(best_structure: StructureType, all_scores: Dict[str, StructureScore]) -> str:
|
|
1634
|
+
"""Generate steering method recommendation based on detected geometry."""
|
|
1635
|
+
recommendations = {
|
|
1636
|
+
StructureType.LINEAR: "Use CAA (Contrastive Activation Addition) - single direction steering is optimal.",
|
|
1637
|
+
StructureType.CONE: "Use PRISM - multi-directional steering will capture the full behavior cone.",
|
|
1638
|
+
StructureType.CLUSTER: "Consider cluster-based steering or multiple separate vectors for each cluster.",
|
|
1639
|
+
StructureType.MANIFOLD: "Use TITAN with learned gating - non-linear structure requires adaptive steering.",
|
|
1640
|
+
StructureType.SPARSE: "Use SAE-based steering targeting the specific active neurons.",
|
|
1641
|
+
StructureType.BIMODAL: "Use PULSE with conditional gating - behavior has two distinct modes.",
|
|
1642
|
+
StructureType.ORTHOGONAL: "Use multiple independent CAA vectors or ICA-based steering.",
|
|
1643
|
+
StructureType.UNKNOWN: "Structure unclear - start with CAA and evaluate effectiveness.",
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
base_rec = recommendations.get(best_structure, recommendations[StructureType.UNKNOWN])
|
|
1647
|
+
|
|
1648
|
+
# Add context from other scores
|
|
1649
|
+
sorted_scores = sorted(all_scores.items(), key=lambda x: x[1].score, reverse=True)
|
|
1650
|
+
if len(sorted_scores) >= 2:
|
|
1651
|
+
second_best = sorted_scores[1]
|
|
1652
|
+
if second_best[1].score > 0.6:
|
|
1653
|
+
base_rec += f" (Also consider {second_best[0]}: score {second_best[1].score:.2f})"
|
|
1654
|
+
|
|
1655
|
+
return base_rec
|