wisent 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (391) hide show
  1. wisent/__init__.py +1 -1
  2. wisent/comparison/__init__.py +1 -0
  3. wisent/comparison/detect_bos_features.py +275 -0
  4. wisent/comparison/fgaa.py +465 -0
  5. wisent/comparison/lora.py +669 -0
  6. wisent/comparison/lora_dpo.py +592 -0
  7. wisent/comparison/main.py +444 -0
  8. wisent/comparison/ours.py +76 -0
  9. wisent/comparison/sae.py +304 -0
  10. wisent/comparison/utils.py +381 -0
  11. wisent/core/activations/activation_cache.py +393 -0
  12. wisent/core/activations/activations.py +3 -3
  13. wisent/core/activations/activations_collector.py +12 -7
  14. wisent/core/activations/classifier_inference_strategy.py +12 -11
  15. wisent/core/activations/extraction_strategy.py +260 -84
  16. wisent/core/classifiers/classifiers/core/atoms.py +3 -2
  17. wisent/core/cli/__init__.py +2 -1
  18. wisent/core/cli/agent/train_classifier.py +16 -3
  19. wisent/core/cli/check_linearity.py +35 -3
  20. wisent/core/cli/cluster_benchmarks.py +4 -6
  21. wisent/core/cli/create_steering_vector.py +6 -4
  22. wisent/core/cli/diagnose_vectors.py +7 -4
  23. wisent/core/cli/estimate_unified_goodness_time.py +6 -4
  24. wisent/core/cli/generate_pairs_from_task.py +9 -56
  25. wisent/core/cli/generate_vector_from_task.py +11 -20
  26. wisent/core/cli/geometry_search.py +137 -0
  27. wisent/core/cli/get_activations.py +2 -2
  28. wisent/core/cli/method_optimizer.py +4 -3
  29. wisent/core/cli/modify_weights.py +3 -2
  30. wisent/core/cli/optimize_sample_size.py +1 -1
  31. wisent/core/cli/optimize_steering.py +14 -16
  32. wisent/core/cli/optimize_weights.py +2 -1
  33. wisent/core/cli/preview_pairs.py +203 -0
  34. wisent/core/cli/steering_method_trainer.py +3 -3
  35. wisent/core/cli/tasks.py +19 -76
  36. wisent/core/cli/train_unified_goodness.py +3 -3
  37. wisent/core/contrastive_pairs/diagnostics/control_vectors.py +4 -4
  38. wisent/core/contrastive_pairs/diagnostics/linearity.py +7 -0
  39. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/agentic_search.py +37 -347
  40. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aider_polyglot.py +113 -136
  41. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codeforces.py +2 -12
  42. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/coding_benchmarks.py +124 -504
  43. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/faithbench.py +40 -63
  44. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flames.py +46 -89
  45. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flores.py +15 -4
  46. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/frames.py +36 -20
  47. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/hallucinations_leaderboard.py +3 -45
  48. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livemathbench.py +42 -4
  49. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/longform_writing.py +2 -112
  50. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/math500.py +39 -4
  51. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/medium_priority_benchmarks.py +475 -525
  52. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mercury.py +65 -42
  53. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/olympiadbench.py +2 -12
  54. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/planbench.py +78 -219
  55. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/polymath.py +37 -4
  56. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/recode.py +84 -69
  57. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/refusalbench.py +168 -160
  58. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/simpleqa.py +44 -25
  59. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/tau_bench.py +3 -103
  60. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/toolbench.py +3 -97
  61. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/toolemu.py +48 -182
  62. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +3 -0
  63. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +19 -1
  64. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aclue.py +1 -3
  65. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench.py +1 -3
  66. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench_hard.py +1 -3
  67. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/advanced.py +2 -4
  68. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aexams.py +1 -3
  69. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrimmlu.py +1 -3
  70. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrixnli.py +2 -2
  71. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabculture.py +1 -3
  72. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic.py +1 -3
  73. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_exams.py +1 -3
  74. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_complete.py +1 -3
  75. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_light.py +1 -3
  76. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabicmmlu.py +1 -3
  77. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aradice.py +1 -3
  78. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc.py +1 -3
  79. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +1 -2
  80. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +1 -2
  81. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +2 -2
  82. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +2 -2
  83. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/babi.py +36 -2
  84. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/basque_bench.py +1 -3
  85. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bbq.py +1 -3
  86. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/belebele.py +1 -3
  87. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/benchmarks.py +1 -3
  88. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bertaqa.py +1 -3
  89. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhs.py +1 -3
  90. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhtc.py +3 -5
  91. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp.py +1 -3
  92. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp_nl.py +1 -3
  93. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +22 -5
  94. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/c4.py +1 -3
  95. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cabbq.py +1 -3
  96. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/careqa.py +1 -3
  97. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalan_bench.py +1 -3
  98. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalanqa.py +1 -3
  99. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catcola.py +1 -3
  100. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +10 -3
  101. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ceval.py +1 -3
  102. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ceval_valid.py +1 -3
  103. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/chain.py +1 -3
  104. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/chartqa.py +1 -3
  105. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/claim.py +1 -3
  106. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/click.py +1 -3
  107. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cmmlu.py +1 -3
  108. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cnn.py +1 -3
  109. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cocoteros.py +1 -3
  110. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coedit.py +1 -3
  111. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/commonsense.py +1 -3
  112. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/commonsense_qa.py +1 -3
  113. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +2 -2
  114. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copal_id.py +1 -3
  115. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +3 -4
  116. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/csatqa.py +1 -3
  117. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cycle.py +1 -3
  118. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darija_bench.py +1 -3
  119. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darijahellaswag.py +2 -6
  120. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darijammlu.py +1 -3
  121. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/dbpedia.py +1 -3
  122. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/discrim_eval.py +1 -3
  123. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/doc.py +1 -3
  124. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +2 -2
  125. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/epec.py +1 -3
  126. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq.py +1 -3
  127. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench.py +1 -3
  128. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench_ca.py +1 -3
  129. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench_es.py +1 -3
  130. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/esbbq.py +1 -3
  131. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ethics.py +1 -3
  132. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus.py +1 -3
  133. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_exams.py +1 -3
  134. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_proficiency.py +1 -3
  135. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_reading.py +1 -3
  136. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_trivia.py +1 -3
  137. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/evalita_llm.py +1 -3
  138. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/financial.py +1 -3
  139. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/flan.py +1 -3
  140. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/french_bench.py +1 -3
  141. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galician_bench.py +1 -3
  142. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gaokao.py +2 -2
  143. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/glianorex.py +1 -3
  144. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_mmlu.py +1 -3
  145. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_piqa.py +1 -3
  146. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gpt3.py +1 -3
  147. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/groundcocoa.py +1 -3
  148. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/haerae.py +1 -3
  149. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +2 -2
  150. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +2 -2
  151. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_ethics.py +5 -9
  152. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_math.py +63 -16
  153. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/histoires_morales.py +1 -3
  154. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hrm8k.py +1 -3
  155. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/humaneval_infilling.py +1 -3
  156. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/icelandic_winogrande.py +1 -3
  157. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse.py +1 -3
  158. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse_scaling.py +1 -3
  159. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ja.py +1 -3
  160. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard.py +1 -3
  161. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard_mc.py +1 -1
  162. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu.py +1 -3
  163. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kobest.py +1 -3
  164. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kormedmcqa.py +5 -17
  165. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_cloze.py +1 -3
  166. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_multilingual.py +1 -3
  167. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/law.py +1 -3
  168. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/leaderboard.py +1 -3
  169. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lingoly.py +1 -3
  170. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/llama3.py +1 -3
  171. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lm_syneval.py +1 -3
  172. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +2 -2
  173. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +2 -2
  174. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbench.py +1 -3
  175. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbenchv2.py +1 -3
  176. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mastermind.py +2 -4
  177. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +2 -2
  178. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/med_concepts_qa.py +2 -4
  179. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/meddialog.py +1 -3
  180. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medical.py +1 -3
  181. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medmcqa.py +1 -3
  182. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +2 -2
  183. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mela.py +2 -2
  184. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/metabench.py +1 -3
  185. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/minerva_math.py +1 -3
  186. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu.py +1 -3
  187. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlusr.py +3 -4
  188. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +2 -2
  189. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multiblimp.py +2 -5
  190. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +2 -2
  191. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +2 -2
  192. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/non.py +1 -3
  193. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval.py +1 -3
  194. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_exact.py +1 -3
  195. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_gen_exact.py +1 -3
  196. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_mc.py +4 -8
  197. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_mc_log_likelihoods.py +4 -8
  198. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/nq_open.py +2 -2
  199. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_arc_multilingual.py +1 -3
  200. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_hellaswag_multilingual.py +1 -3
  201. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_mmlu_multilingual.py +1 -3
  202. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_truthfulqa_multilingual.py +2 -5
  203. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/olaph.py +1 -3
  204. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +2 -2
  205. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/option.py +1 -3
  206. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/parafraseja.py +1 -3
  207. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/parafrases.py +1 -3
  208. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paws.py +1 -3
  209. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paws_x.py +1 -3
  210. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +2 -2
  211. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/persona.py +1 -3
  212. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/phrases.py +1 -3
  213. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pile.py +1 -3
  214. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +2 -2
  215. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/portuguese_bench.py +1 -3
  216. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prompt.py +1 -3
  217. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +2 -2
  218. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +2 -2
  219. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +2 -2
  220. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +2 -2
  221. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper_bool.py +2 -2
  222. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +2 -2
  223. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnlieu.py +1 -3
  224. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +2 -2
  225. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +2 -2
  226. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/random.py +1 -3
  227. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +2 -2
  228. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/reversed.py +1 -3
  229. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +2 -2
  230. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ruler.py +1 -3
  231. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +2 -2
  232. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/score.py +1 -3
  233. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/scrolls.py +1 -3
  234. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/scrolls_mc.py +1 -3
  235. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/self.py +1 -3
  236. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sglue.py +1 -3
  237. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sglue_rte.py +2 -1
  238. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/siqa.py +4 -7
  239. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +2 -2
  240. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/spanish_bench.py +1 -3
  241. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/storycloze.py +2 -6
  242. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/summarization.py +1 -3
  243. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super.py +1 -3
  244. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super_glue.py +1 -3
  245. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +2 -2
  246. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swde.py +1 -3
  247. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sycophancy.py +1 -3
  248. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/t0.py +1 -3
  249. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/teca.py +1 -3
  250. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyarc.py +1 -3
  251. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinybenchmarks.py +1 -3
  252. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinygsm8k.py +1 -3
  253. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyhellaswag.py +1 -3
  254. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinymmlu.py +1 -3
  255. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinytruthfulqa.py +1 -3
  256. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinywinogrande.py +1 -3
  257. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tmmluplus.py +1 -3
  258. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +2 -2
  259. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa.py +1 -3
  260. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +9 -4
  261. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +1 -3
  262. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turblimp_core.py +1 -3
  263. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu.py +1 -3
  264. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu_mc.py +0 -2
  265. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/unscramble.py +1 -3
  266. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/vaxx.py +2 -2
  267. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +2 -2
  268. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +3 -4
  269. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +2 -2
  270. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wmdp.py +1 -3
  271. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +2 -2
  272. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +2 -2
  273. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc273.py +1 -3
  274. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xcopa.py +1 -3
  275. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xlsum.py +1 -3
  276. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +2 -2
  277. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xquad.py +2 -4
  278. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +2 -3
  279. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +2 -2
  280. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/zhoblimp.py +1 -3
  281. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +173 -6
  282. wisent/core/data_loaders/loaders/lm_loader.py +12 -1
  283. wisent/core/geometry_runner.py +995 -0
  284. wisent/core/geometry_search_space.py +237 -0
  285. wisent/core/hyperparameter_optimizer.py +1 -1
  286. wisent/core/main.py +3 -0
  287. wisent/core/models/core/atoms.py +5 -3
  288. wisent/core/models/wisent_model.py +1 -1
  289. wisent/core/optuna/classifier/optuna_classifier_optimizer.py +2 -2
  290. wisent/core/parser_arguments/check_linearity_parser.py +12 -2
  291. wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +2 -2
  292. wisent/core/parser_arguments/generate_vector_from_task_parser.py +6 -13
  293. wisent/core/parser_arguments/geometry_search_parser.py +61 -0
  294. wisent/core/parser_arguments/get_activations_parser.py +5 -14
  295. wisent/core/parser_arguments/main_parser.py +8 -0
  296. wisent/core/parser_arguments/train_unified_goodness_parser.py +2 -2
  297. wisent/core/steering.py +5 -3
  298. wisent/core/steering_methods/methods/hyperplane.py +2 -1
  299. wisent/core/synthetic/generators/nonsense_generator.py +30 -18
  300. wisent/core/trainers/steering_trainer.py +2 -2
  301. wisent/core/utils/device.py +27 -27
  302. wisent/core/utils/layer_combinations.py +70 -0
  303. wisent/examples/__init__.py +1 -0
  304. wisent/examples/scripts/__init__.py +1 -0
  305. wisent/examples/scripts/count_all_benchmarks.py +121 -0
  306. wisent/examples/scripts/discover_directions.py +469 -0
  307. wisent/examples/scripts/extract_benchmark_info.py +71 -0
  308. wisent/examples/scripts/search_all_short_names.py +31 -0
  309. wisent/examples/scripts/test_all_benchmarks.py +138 -0
  310. wisent/examples/scripts/test_all_benchmarks_new.py +28 -0
  311. wisent/examples/scripts/test_contrastive_pairs_all_supported.py +230 -0
  312. wisent/examples/scripts/test_nonsense_baseline.py +261 -0
  313. wisent/examples/scripts/test_one_benchmark.py +324 -0
  314. wisent/examples/scripts/test_one_coding_benchmark.py +293 -0
  315. wisent/parameters/lm_eval/broken_in_lm_eval.json +179 -2
  316. wisent/parameters/lm_eval/category_directions.json +137 -0
  317. wisent/parameters/lm_eval/repair_plan.json +282 -0
  318. wisent/parameters/lm_eval/weak_contrastive_pairs.json +38 -0
  319. wisent/parameters/lm_eval/working_benchmarks.json +206 -0
  320. wisent/parameters/lm_eval/working_benchmarks_categorized.json +236 -0
  321. wisent/tests/test_detector_accuracy.py +1 -1
  322. wisent/tests/visualize_geometry.py +1 -1
  323. {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/METADATA +5 -1
  324. {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/RECORD +328 -358
  325. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/browsecomp.py +0 -245
  326. wisent/examples/contrastive_pairs/humanization_human_vs_ai.json +0 -2112
  327. wisent/examples/scripts/1/test_basqueglue_evaluation.json +0 -51
  328. wisent/examples/scripts/1/test_basqueglue_pairs.json +0 -14
  329. wisent/examples/scripts/1/test_bec2016eu_evaluation.json +0 -51
  330. wisent/examples/scripts/1/test_bec2016eu_pairs.json +0 -14
  331. wisent/examples/scripts/1/test_belebele_evaluation.json +0 -51
  332. wisent/examples/scripts/1/test_belebele_pairs.json +0 -14
  333. wisent/examples/scripts/1/test_benchmarks_evaluation.json +0 -51
  334. wisent/examples/scripts/1/test_benchmarks_pairs.json +0 -14
  335. wisent/examples/scripts/1/test_bertaqa_evaluation.json +0 -51
  336. wisent/examples/scripts/1/test_bertaqa_pairs.json +0 -14
  337. wisent/examples/scripts/1/test_bhtc_v2_evaluation.json +0 -30
  338. wisent/examples/scripts/1/test_bhtc_v2_pairs.json +0 -8
  339. wisent/examples/scripts/1/test_boolq-seq2seq_evaluation.json +0 -30
  340. wisent/examples/scripts/1/test_boolq-seq2seq_pairs.json +0 -8
  341. wisent/examples/scripts/1/test_cabreu_evaluation.json +0 -30
  342. wisent/examples/scripts/1/test_cabreu_pairs.json +0 -8
  343. wisent/examples/scripts/1/test_careqa_en_evaluation.json +0 -30
  344. wisent/examples/scripts/1/test_careqa_en_pairs.json +0 -8
  345. wisent/examples/scripts/1/test_careqa_evaluation.json +0 -30
  346. wisent/examples/scripts/1/test_careqa_pairs.json +0 -8
  347. wisent/examples/scripts/1/test_catalanqa_evaluation.json +0 -30
  348. wisent/examples/scripts/1/test_catalanqa_pairs.json +0 -8
  349. wisent/examples/scripts/1/test_catcola_evaluation.json +0 -30
  350. wisent/examples/scripts/1/test_catcola_pairs.json +0 -8
  351. wisent/examples/scripts/1/test_chartqa_evaluation.json +0 -30
  352. wisent/examples/scripts/1/test_chartqa_pairs.json +0 -8
  353. wisent/examples/scripts/1/test_claim_stance_topic_evaluation.json +0 -30
  354. wisent/examples/scripts/1/test_claim_stance_topic_pairs.json +0 -8
  355. wisent/examples/scripts/1/test_cnn_dailymail_evaluation.json +0 -30
  356. wisent/examples/scripts/1/test_cnn_dailymail_pairs.json +0 -8
  357. wisent/examples/scripts/1/test_cocoteros_es_evaluation.json +0 -30
  358. wisent/examples/scripts/1/test_cocoteros_es_pairs.json +0 -8
  359. wisent/examples/scripts/1/test_coedit_gec_evaluation.json +0 -30
  360. wisent/examples/scripts/1/test_coedit_gec_pairs.json +0 -8
  361. wisent/examples/scripts/1/test_cola_evaluation.json +0 -30
  362. wisent/examples/scripts/1/test_cola_pairs.json +0 -8
  363. wisent/examples/scripts/1/test_coqcat_evaluation.json +0 -30
  364. wisent/examples/scripts/1/test_coqcat_pairs.json +0 -8
  365. wisent/examples/scripts/1/test_dbpedia_14_evaluation.json +0 -30
  366. wisent/examples/scripts/1/test_dbpedia_14_pairs.json +0 -8
  367. wisent/examples/scripts/1/test_epec_koref_bin_evaluation.json +0 -30
  368. wisent/examples/scripts/1/test_epec_koref_bin_pairs.json +0 -8
  369. wisent/examples/scripts/1/test_ethos_binary_evaluation.json +0 -30
  370. wisent/examples/scripts/1/test_ethos_binary_pairs.json +0 -8
  371. wisent/examples/scripts/2/test_afrimgsm_direct_amh_evaluation.json +0 -30
  372. wisent/examples/scripts/2/test_afrimgsm_direct_amh_pairs.json +0 -8
  373. wisent/examples/scripts/2/test_afrimmlu_direct_amh_evaluation.json +0 -30
  374. wisent/examples/scripts/2/test_afrimmlu_direct_amh_pairs.json +0 -8
  375. wisent/examples/scripts/2/test_afrixnli_en_direct_amh_evaluation.json +0 -30
  376. wisent/examples/scripts/2/test_afrixnli_en_direct_amh_pairs.json +0 -8
  377. wisent/examples/scripts/2/test_arc_ar_evaluation.json +0 -30
  378. wisent/examples/scripts/2/test_arc_ar_pairs.json +0 -8
  379. wisent/examples/scripts/2/test_atis_evaluation.json +0 -30
  380. wisent/examples/scripts/2/test_atis_pairs.json +0 -8
  381. wisent/examples/scripts/2/test_babi_evaluation.json +0 -30
  382. wisent/examples/scripts/2/test_babi_pairs.json +0 -8
  383. wisent/examples/scripts/2/test_babilong_evaluation.json +0 -30
  384. wisent/examples/scripts/2/test_babilong_pairs.json +0 -8
  385. wisent/examples/scripts/2/test_bangla_mmlu_evaluation.json +0 -30
  386. wisent/examples/scripts/2/test_bangla_mmlu_pairs.json +0 -8
  387. wisent/examples/scripts/2/test_basque-glue_pairs.json +0 -14
  388. {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/WHEEL +0 -0
  389. {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/entry_points.txt +0 -0
  390. {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/licenses/LICENSE +0 -0
  391. {wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,469 @@
1
+ """
2
+ Discover unified directions for skill categories (coding, math, hallucination, etc.)
3
+
4
+ Uses GeometrySearchSpace to test all models, strategies, and layer combinations.
5
+ For each category, determines if a unified direction exists.
6
+
7
+ Usage:
8
+ # Run for all models (sequentially)
9
+ python -m wisent.examples.scripts.discover_directions
10
+
11
+ # Run for a specific model (for parallel execution)
12
+ python -m wisent.examples.scripts.discover_directions --model meta-llama/Llama-3.2-1B-Instruct
13
+ """
14
+
15
+ import argparse
16
+ import json
17
+ import subprocess
18
+ from pathlib import Path
19
+ from typing import Dict, List, Any, Optional
20
+ from dataclasses import dataclass, field, asdict
21
+
22
+ S3_BUCKET = "wisent-bucket"
23
+ S3_PREFIX = "direction_discovery"
24
+
25
+
26
+ def s3_sync_download(model_name: str, output_dir: Path) -> None:
27
+ """Download existing results from S3."""
28
+ model_prefix = model_name.replace('/', '_')
29
+ s3_path = f"s3://{S3_BUCKET}/{S3_PREFIX}/{model_prefix}/"
30
+ try:
31
+ subprocess.run(
32
+ ["aws", "s3", "sync", s3_path, str(output_dir), "--quiet"],
33
+ check=False,
34
+ capture_output=True,
35
+ )
36
+ print(f"Synced existing results from S3: {s3_path}")
37
+ except Exception as e:
38
+ print(f"S3 download skipped: {e}")
39
+
40
+
41
+ def s3_upload_file(local_path: Path, model_name: str) -> None:
42
+ """Upload a single file to S3."""
43
+ model_prefix = model_name.replace('/', '_')
44
+ s3_path = f"s3://{S3_BUCKET}/{S3_PREFIX}/{model_prefix}/{local_path.name}"
45
+ try:
46
+ subprocess.run(
47
+ ["aws", "s3", "cp", str(local_path), s3_path, "--quiet"],
48
+ check=True,
49
+ capture_output=True,
50
+ )
51
+ print(f" Uploaded to S3: {s3_path}")
52
+ except Exception as e:
53
+ print(f" S3 upload failed: {e}")
54
+
55
+ from wisent.core.geometry_search_space import (
56
+ GeometrySearchSpace,
57
+ GeometrySearchConfig,
58
+ )
59
+ from wisent.core.geometry_runner import (
60
+ GeometryRunner,
61
+ GeometrySearchResults,
62
+ GeometryTestResult,
63
+ )
64
+ from wisent.core.contrastive_pairs.diagnostics.control_vectors import (
65
+ GeometryAnalysisConfig,
66
+ StructureType,
67
+ )
68
+ from wisent.core.models.wisent_model import WisentModel
69
+
70
+
71
+ def load_categorized_benchmarks() -> Dict[str, List[str]]:
72
+ """Load benchmarks grouped by category."""
73
+ params_dir = Path(__file__).parent.parent.parent / "parameters" / "lm_eval"
74
+ with open(params_dir / "working_benchmarks_categorized.json") as f:
75
+ return json.load(f)
76
+
77
+
78
+ def load_category_directions() -> Dict[str, Dict]:
79
+ """Load hypothesized directions for each category."""
80
+ params_dir = Path(__file__).parent.parent.parent / "parameters" / "lm_eval"
81
+ with open(params_dir / "category_directions.json") as f:
82
+ return json.load(f)
83
+
84
+
85
+ @dataclass
86
+ class CategoryResult:
87
+ """Result for a single category."""
88
+ category: str
89
+ description: str
90
+ benchmarks_tested: List[str]
91
+ total_tests: int
92
+
93
+ # Step 1: Signal detection
94
+ avg_signal_strength: float # MLP CV accuracy
95
+ signal_exists: bool # avg_signal_strength > 0.6
96
+
97
+ # Step 2: Linearity check
98
+ avg_linear_probe_accuracy: float # Linear probe CV accuracy
99
+ is_linear: bool # signal is linear (CAA will work)
100
+
101
+ # NEW: Nonlinear signal metrics
102
+ avg_knn_accuracy_k10: float # k-NN CV accuracy
103
+ avg_mmd_rbf: float # Maximum Mean Discrepancy
104
+ avg_local_dim_pos: float # Local intrinsic dim of positive class
105
+ avg_local_dim_neg: float # Local intrinsic dim of negative class
106
+ avg_fisher_max: float # Max Fisher ratio
107
+ avg_density_ratio: float # Density ratio
108
+
109
+ # Step 3: Geometry details (only meaningful if signal_exists)
110
+ structure_distribution: Dict[str, int]
111
+ structure_percentages: Dict[str, float]
112
+ dominant_structure: str
113
+ avg_linear_score: float
114
+ avg_cohens_d: float
115
+
116
+ # Final recommendation
117
+ recommendation: str # NO_SIGNAL, CAA, or NONLINEAR
118
+ has_unified_direction: bool
119
+ best_config: Optional[Dict[str, Any]] = None
120
+
121
+
122
+ @dataclass
123
+ class DiscoveryResults:
124
+ """Results from full discovery run."""
125
+ model: str
126
+ categories: Dict[str, CategoryResult] = field(default_factory=dict)
127
+
128
+ def summary(self) -> str:
129
+ lines = [
130
+ f"Model: {self.model}",
131
+ f"Categories analyzed: {len(self.categories)}",
132
+ "",
133
+ ]
134
+
135
+ # Group by recommendation
136
+ caa_ready = [] # Has signal AND linear
137
+ nonlinear = [] # Has signal but NOT linear
138
+ no_signal = [] # No signal
139
+
140
+ for name, cat in self.categories.items():
141
+ if not cat.signal_exists:
142
+ no_signal.append(name)
143
+ elif cat.is_linear:
144
+ caa_ready.append(name)
145
+ else:
146
+ nonlinear.append(name)
147
+
148
+ if caa_ready:
149
+ lines.append(f"CAA READY - Linear signal ({len(caa_ready)}):")
150
+ for name in sorted(caa_ready, key=lambda n: self.categories[n].avg_signal_strength, reverse=True):
151
+ cat = self.categories[name]
152
+ lines.append(f" {name}: signal={cat.avg_signal_strength:.2f}, linear={cat.avg_linear_probe_accuracy:.2f}, kNN={cat.avg_knn_accuracy_k10:.2f}")
153
+
154
+ if nonlinear:
155
+ lines.append(f"\nNONLINEAR - Need different method ({len(nonlinear)}):")
156
+ for name in nonlinear:
157
+ cat = self.categories[name]
158
+ lines.append(f" {name}: signal={cat.avg_signal_strength:.2f}, linear={cat.avg_linear_probe_accuracy:.2f}, kNN={cat.avg_knn_accuracy_k10:.2f}, MMD={cat.avg_mmd_rbf:.3f}")
159
+
160
+ if no_signal:
161
+ lines.append(f"\nNO SIGNAL ({len(no_signal)}):")
162
+ for name in no_signal:
163
+ cat = self.categories[name]
164
+ lines.append(f" {name}: signal={cat.avg_signal_strength:.2f}, kNN={cat.avg_knn_accuracy_k10:.2f}")
165
+
166
+ return "\n".join(lines)
167
+
168
+
169
+ def analyze_category_results(results: GeometrySearchResults, category: str, description: str, benchmarks: List[str]) -> CategoryResult:
170
+ """Analyze geometry results for a category."""
171
+ if not results.results:
172
+ return CategoryResult(
173
+ category=category,
174
+ description=description,
175
+ benchmarks_tested=benchmarks,
176
+ total_tests=0,
177
+ avg_signal_strength=0.5,
178
+ signal_exists=False,
179
+ avg_linear_probe_accuracy=0.5,
180
+ is_linear=False,
181
+ avg_knn_accuracy_k10=0.5,
182
+ avg_mmd_rbf=0.0,
183
+ avg_local_dim_pos=0.0,
184
+ avg_local_dim_neg=0.0,
185
+ avg_fisher_max=0.0,
186
+ avg_density_ratio=1.0,
187
+ structure_distribution={},
188
+ structure_percentages={},
189
+ dominant_structure="error",
190
+ avg_linear_score=0.0,
191
+ avg_cohens_d=0.0,
192
+ recommendation="NO_RESULTS",
193
+ has_unified_direction=False,
194
+ )
195
+
196
+ dist = results.get_structure_distribution()
197
+ total = sum(dist.values())
198
+
199
+ percentages = {k: 100 * v / total for k, v in dist.items()} if total > 0 else {}
200
+
201
+ # Determine dominant structure
202
+ dominant = max(dist.items(), key=lambda x: x[1])[0] if dist else "unknown"
203
+
204
+ # Step 1: Signal detection (MLP CV accuracy)
205
+ avg_signal_strength = sum(r.signal_strength for r in results.results) / len(results.results)
206
+ signal_exists = avg_signal_strength > 0.6
207
+
208
+ # Step 2: Linearity check (Linear probe CV accuracy)
209
+ avg_linear_probe_accuracy = sum(r.linear_probe_accuracy for r in results.results) / len(results.results)
210
+ # Signal is linear if linear probe is close to MLP accuracy
211
+ is_linear = signal_exists and avg_linear_probe_accuracy > 0.6 and (avg_signal_strength - avg_linear_probe_accuracy) < 0.15
212
+
213
+ # Step 2b: Nonlinear signal metrics
214
+ avg_knn_accuracy_k10 = sum(r.knn_accuracy_k10 for r in results.results) / len(results.results)
215
+ avg_mmd_rbf = sum(r.mmd_rbf for r in results.results) / len(results.results)
216
+ avg_local_dim_pos = sum(r.local_dim_pos for r in results.results) / len(results.results)
217
+ avg_local_dim_neg = sum(r.local_dim_neg for r in results.results) / len(results.results)
218
+ avg_fisher_max = sum(r.fisher_max for r in results.results) / len(results.results)
219
+ avg_density_ratio = sum(r.density_ratio for r in results.results) / len(results.results)
220
+
221
+ # Step 3: Geometry details
222
+ avg_linear_score = sum(r.linear_score for r in results.results) / len(results.results)
223
+ avg_cohens_d = sum(r.cohens_d for r in results.results) / len(results.results)
224
+
225
+ # Final recommendation
226
+ if not signal_exists:
227
+ recommendation = "NO_SIGNAL"
228
+ elif is_linear:
229
+ recommendation = "CAA"
230
+ else:
231
+ recommendation = "NONLINEAR"
232
+
233
+ # Unified direction exists if we have linear signal
234
+ has_unified = is_linear
235
+
236
+ # Best config - prefer high signal_strength
237
+ best = sorted(results.results, key=lambda r: r.signal_strength, reverse=True)[:1]
238
+ best_config = None
239
+ if best:
240
+ b = best[0]
241
+ best_config = {
242
+ "benchmark": b.benchmark,
243
+ "strategy": b.strategy,
244
+ "layers": b.layers,
245
+ "signal_strength": b.signal_strength,
246
+ "linear_probe_accuracy": b.linear_probe_accuracy,
247
+ "is_linear": b.is_linear,
248
+ }
249
+
250
+ return CategoryResult(
251
+ category=category,
252
+ description=description,
253
+ benchmarks_tested=benchmarks,
254
+ total_tests=total,
255
+ avg_signal_strength=avg_signal_strength,
256
+ signal_exists=signal_exists,
257
+ avg_linear_probe_accuracy=avg_linear_probe_accuracy,
258
+ is_linear=is_linear,
259
+ avg_knn_accuracy_k10=avg_knn_accuracy_k10,
260
+ avg_mmd_rbf=avg_mmd_rbf,
261
+ avg_local_dim_pos=avg_local_dim_pos,
262
+ avg_local_dim_neg=avg_local_dim_neg,
263
+ avg_fisher_max=avg_fisher_max,
264
+ avg_density_ratio=avg_density_ratio,
265
+ structure_distribution=dist,
266
+ structure_percentages=percentages,
267
+ dominant_structure=dominant,
268
+ avg_linear_score=avg_linear_score,
269
+ avg_cohens_d=avg_cohens_d,
270
+ recommendation=recommendation,
271
+ has_unified_direction=has_unified,
272
+ best_config=best_config,
273
+ )
274
+
275
+
276
+ def run_discovery_for_model(model_name: str, output_dir: Path):
277
+ """Run discovery for a single model with resume support."""
278
+ categories = load_categorized_benchmarks()
279
+ category_info = load_category_directions()
280
+ search_space = GeometrySearchSpace()
281
+
282
+ print(f"\n{'=' * 70}")
283
+ print(f"MODEL: {model_name}")
284
+ print("=" * 70)
285
+
286
+ # Download existing results from S3 for resume
287
+ s3_sync_download(model_name, output_dir)
288
+
289
+ # Check which categories are already done
290
+ model_prefix = model_name.replace('/', '_')
291
+ completed_categories = set()
292
+ for cat_name in categories.keys():
293
+ cat_file = output_dir / f"{model_prefix}_{cat_name}.json"
294
+ if cat_file.exists() and cat_file.stat().st_size > 100:
295
+ completed_categories.add(cat_name)
296
+ print(f" [SKIP] {cat_name} already completed")
297
+
298
+ remaining = [c for c in categories.keys() if c not in completed_categories]
299
+ if not remaining:
300
+ print("All categories already completed!")
301
+ return None
302
+
303
+ print(f"\nCompleted: {len(completed_categories)}/15, Remaining: {len(remaining)}")
304
+ print(f"Categories to run: {remaining}")
305
+
306
+ try:
307
+ model = WisentModel(model_name, device="cuda")
308
+ print(f"Loaded: {model.num_layers} layers, hidden={model.hidden_size}")
309
+ except Exception as e:
310
+ print(f"Failed to load model: {e}")
311
+ return None
312
+
313
+ cache_dir = f"/tmp/wisent_direction_cache_{model_prefix}"
314
+
315
+ model_results = DiscoveryResults(model=model_name)
316
+
317
+ # Run for each remaining category
318
+ for cat_name in remaining:
319
+ benchmarks = categories[cat_name]
320
+ print(f"\n{'-' * 50}")
321
+ print(f"Category: {cat_name.upper()} ({len(benchmarks)} benchmarks)")
322
+ print("-" * 50)
323
+
324
+ info = category_info.get(cat_name, {})
325
+ description = info.get("description", "")
326
+ print(f"Description: {description}")
327
+
328
+ # Create search space for this category
329
+ cat_config = GeometrySearchConfig(
330
+ pairs_per_benchmark=search_space.config.pairs_per_benchmark,
331
+ max_layer_combo_size=search_space.config.max_layer_combo_size,
332
+ cache_dir=cache_dir,
333
+ )
334
+
335
+ cat_space = GeometrySearchSpace(
336
+ models=[model_name],
337
+ strategies=search_space.strategies,
338
+ benchmarks=benchmarks,
339
+ config=cat_config,
340
+ )
341
+
342
+ # Run geometry search
343
+ runner = GeometryRunner(cat_space, model, cache_dir=cache_dir)
344
+
345
+ try:
346
+ results = runner.run(show_progress=True)
347
+ cat_result = analyze_category_results(results, cat_name, description, benchmarks)
348
+ model_results.categories[cat_name] = cat_result
349
+
350
+ print(f"\n Step 1 - Signal: {cat_result.avg_signal_strength:.3f} ({'EXISTS' if cat_result.signal_exists else 'NONE'})")
351
+ print(f" Step 2 - Linear: {cat_result.avg_linear_probe_accuracy:.3f} ({'YES' if cat_result.is_linear else 'NO'})")
352
+ print(f" Recommendation: {cat_result.recommendation}")
353
+
354
+ # Save per-category results immediately
355
+ cat_file = output_dir / f"{model_prefix}_{cat_name}.json"
356
+ results.save(str(cat_file))
357
+ print(f" Saved: {cat_file}")
358
+
359
+ # Upload to S3 immediately for durability
360
+ s3_upload_file(cat_file, model_name)
361
+
362
+ except Exception as e:
363
+ print(f" ERROR: {e}")
364
+ continue
365
+
366
+ # Save/update model summary (merge with existing if any)
367
+ summary_file = output_dir / f"{model_prefix}_summary.json"
368
+
369
+ # Load existing summary if present
370
+ existing_categories = {}
371
+ if summary_file.exists():
372
+ with open(summary_file) as f:
373
+ existing = json.load(f)
374
+ existing_categories = existing.get("categories", {})
375
+
376
+ # Merge new results
377
+ all_categories = {**existing_categories, **{k: asdict(v) for k, v in model_results.categories.items()}}
378
+
379
+ with open(summary_file, "w") as f:
380
+ json.dump({
381
+ "model": model_name,
382
+ "categories": all_categories
383
+ }, f, indent=2)
384
+
385
+ # Upload summary to S3
386
+ s3_upload_file(summary_file, model_name)
387
+
388
+ print(f"\n{model_results.summary()}")
389
+
390
+ # Cleanup model
391
+ del model
392
+
393
+ return model_results
394
+
395
+
396
+ def run_discovery(model_filter: Optional[str] = None, samples_per_benchmark: int = 50):
397
+ """Run full category direction discovery."""
398
+ print("=" * 70)
399
+ print("CATEGORY DIRECTION DISCOVERY")
400
+ print("=" * 70)
401
+
402
+ # Load categories
403
+ categories = load_categorized_benchmarks()
404
+ category_info = load_category_directions()
405
+
406
+ print(f"Categories: {list(categories.keys())}")
407
+ print(f"Total benchmarks: {sum(len(b) for b in categories.values())}")
408
+
409
+ # Get search space config
410
+ search_space = GeometrySearchSpace()
411
+ search_space.config.pairs_per_benchmark = samples_per_benchmark
412
+
413
+ # Filter models if specified
414
+ if model_filter:
415
+ models_to_test = [model_filter]
416
+ else:
417
+ models_to_test = search_space.models
418
+
419
+ print(f"\nModels to test: {models_to_test}")
420
+ print(f"Strategies: {[s.value for s in search_space.strategies]}")
421
+ print(f"Pairs per benchmark: {search_space.config.pairs_per_benchmark}")
422
+
423
+ # Output directory
424
+ output_dir = Path("/tmp/direction_discovery")
425
+ output_dir.mkdir(parents=True, exist_ok=True)
426
+
427
+ all_model_results = {}
428
+
429
+ # Run for each model
430
+ for model_name in models_to_test:
431
+ model_results = run_discovery_for_model(model_name, output_dir)
432
+ if model_results:
433
+ all_model_results[model_name] = model_results
434
+
435
+ # Save overall summary (only if running all models)
436
+ if not model_filter and all_model_results:
437
+ overall_file = output_dir / "discovery_summary.json"
438
+ overall = {
439
+ "models": list(all_model_results.keys()),
440
+ "categories": list(categories.keys()),
441
+ "results": {}
442
+ }
443
+ for model_name, results in all_model_results.items():
444
+ overall["results"][model_name] = {
445
+ cat: {
446
+ "has_unified_direction": r.has_unified_direction,
447
+ "dominant_structure": r.dominant_structure,
448
+ "recommendation": r.recommendation,
449
+ "avg_linear_score": r.avg_linear_score,
450
+ }
451
+ for cat, r in results.categories.items()
452
+ }
453
+
454
+ with open(overall_file, "w") as f:
455
+ json.dump(overall, f, indent=2)
456
+
457
+ print(f"\n{'=' * 70}")
458
+ print("DISCOVERY COMPLETE")
459
+ print("=" * 70)
460
+ print(f"Results saved to: {output_dir}")
461
+
462
+
463
+ if __name__ == "__main__":
464
+ parser = argparse.ArgumentParser(description="Discover unified directions for skill categories")
465
+ parser.add_argument("--model", type=str, default=None, help="Specific model to test (for parallel execution)")
466
+ parser.add_argument("--samples-per-benchmark", type=int, default=50, help="Number of samples per benchmark (default: 50)")
467
+ args = parser.parse_args()
468
+
469
+ run_discovery(model_filter=args.model, samples_per_benchmark=args.samples_per_benchmark)
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env python3
2
+ """Extract benchmark information from README files."""
3
+
4
+ import json
5
+ import re
6
+ from pathlib import Path
7
+
8
+ def extract_info_from_readme(readme_path):
9
+ """Extract title, description, paper, homepage from README."""
10
+ content = readme_path.read_text()
11
+
12
+ info = {
13
+ "name": readme_path.stem,
14
+ "description": "",
15
+ "paper": "",
16
+ "homepage": ""
17
+ }
18
+
19
+ # Extract title (first # heading)
20
+ title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
21
+ if title_match:
22
+ info["name"] = title_match.group(1).strip()
23
+
24
+ # Extract paper link
25
+ paper_match = re.search(r'(?:Paper|Abstract).*?https?://[^\s\)]+', content, re.IGNORECASE)
26
+ if paper_match:
27
+ info["paper"] = paper_match.group(0)
28
+
29
+ # Extract homepage
30
+ homepage_match = re.search(r'Homepage.*?https?://[^\s\)]+', content, re.IGNORECASE)
31
+ if homepage_match:
32
+ info["homepage"] = homepage_match.group(0)
33
+
34
+ # Extract description (first paragraph after title or abstract)
35
+ desc_match = re.search(r'(?:Abstract|##\s*Abstract)[:\s]*(.+?)(?:\n\n|\n#)', content, re.DOTALL | re.IGNORECASE)
36
+ if desc_match:
37
+ desc = desc_match.group(1).strip()
38
+ # Clean up
39
+ desc = re.sub(r'\s+', ' ', desc)
40
+ desc = desc[:500] # Limit length
41
+ info["description"] = desc
42
+ else:
43
+ # Try to get first substantial paragraph
44
+ paragraphs = [p.strip() for p in content.split('\n\n') if len(p.strip()) > 50]
45
+ if paragraphs:
46
+ info["description"] = paragraphs[0][:500]
47
+
48
+ return info
49
+
50
+ def main():
51
+ readmes_dir = Path(__file__).parent / "readmes"
52
+ output_file = Path(__file__).parent / "results" / "benchmark_descriptions.json"
53
+
54
+ output_file.parent.mkdir(exist_ok=True)
55
+
56
+ all_info = {}
57
+
58
+ for readme_path in sorted(readmes_dir.glob("*.md")):
59
+ benchmark_name = readme_path.stem
60
+ info = extract_info_from_readme(readme_path)
61
+ all_info[benchmark_name] = info
62
+ print(f"Processed {benchmark_name}")
63
+
64
+ with open(output_file, 'w') as f:
65
+ json.dump(all_info, f, indent=2)
66
+
67
+ print(f"\nExtracted info for {len(all_info)} benchmarks")
68
+ print(f"Output: {output_file}")
69
+
70
+ if __name__ == "__main__":
71
+ main()
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env python3
2
+ """Search for all short task names that might match Tag."""
3
+
4
+ import sys
5
+ sys.path.insert(0, '/Users/lukaszbartoszcze/Documents/CodingProjects/Wisent/backends/wisent-open-source')
6
+
7
+ from lm_eval.tasks import TaskManager
8
+
9
+ def main():
10
+ tm = TaskManager()
11
+
12
+ # Get all 3-letter task names
13
+ three_letter = [t for t in tm.task_index.keys() if len(t) == 3]
14
+ print(f"Found {len(three_letter)} tasks with exactly 3 letters:")
15
+ for task in sorted(three_letter):
16
+ print(f" - {task}")
17
+
18
+ # Get all 3-4 letter task names starting with T
19
+ short_t = [t for t in tm.task_index.keys() if t.lower().startswith('t') and 3 <= len(t) <= 4]
20
+ print(f"\nFound {len(short_t)} tasks with 3-4 letters starting with 't':")
21
+ for task in sorted(short_t):
22
+ print(f" - {task}")
23
+
24
+ # Search for anything with T, A, G in sequence (case insensitive)
25
+ tag_pattern = [t for t in tm.task_index.keys() if 't' in t.lower() and 'a' in t.lower() and 'g' in t.lower()]
26
+ print(f"\nFound {len(tag_pattern)} tasks containing t, a, and g:")
27
+ for task in sorted(tag_pattern)[:20]: # Show first 20
28
+ print(f" - {task}")
29
+
30
+ if __name__ == "__main__":
31
+ main()