wisent 0.7.379__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1720) hide show
  1. wisent/__init__.py +64 -0
  2. wisent/cli.py +114 -0
  3. wisent/core/__init__.py +40 -0
  4. wisent/core/activations/__init__.py +26 -0
  5. wisent/core/activations/activations.py +97 -0
  6. wisent/core/activations/activations_collector.py +506 -0
  7. wisent/core/activations/core/__init__.py +0 -0
  8. wisent/core/activations/core/atoms.py +219 -0
  9. wisent/core/activations/prompt_construction_strategy.py +47 -0
  10. wisent/core/adapters/__init__.py +22 -0
  11. wisent/core/adapters/audio.py +616 -0
  12. wisent/core/adapters/base.py +420 -0
  13. wisent/core/adapters/multimodal.py +738 -0
  14. wisent/core/adapters/robotics.py +643 -0
  15. wisent/core/adapters/text.py +441 -0
  16. wisent/core/adapters/video.py +555 -0
  17. wisent/core/agent/__init__.py +1 -0
  18. wisent/core/agent/budget.py +644 -0
  19. wisent/core/agent/device_benchmarks.py +691 -0
  20. wisent/core/agent/diagnose/__init__.py +1 -0
  21. wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
  22. wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
  23. wisent/core/agent/diagnose/create_classifier.py +1155 -0
  24. wisent/core/agent/diagnose/response_diagnostics.py +273 -0
  25. wisent/core/agent/diagnose/select_classifiers.py +507 -0
  26. wisent/core/agent/diagnose/synthetic_classifier_option.py +755 -0
  27. wisent/core/agent/diagnose/tasks/__init__.py +33 -0
  28. wisent/core/agent/diagnose/tasks/task_manager.py +1453 -0
  29. wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
  30. wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
  31. wisent/core/agent/diagnose.py +249 -0
  32. wisent/core/agent/steer.py +215 -0
  33. wisent/core/agent/timeout.py +134 -0
  34. wisent/core/autonomous_agent.py +1158 -0
  35. wisent/core/benchmark_extractors.py +372 -0
  36. wisent/core/benchmark_registry.py +151 -0
  37. wisent/core/bigcode_extractors.py +26 -0
  38. wisent/core/bigcode_integration.py +886 -0
  39. wisent/core/branding.py +108 -0
  40. wisent/core/classifier/__init__.py +1 -0
  41. wisent/core/classifier/models/__init__.py +1 -0
  42. wisent/core/classifiers/__init__.py +1 -0
  43. wisent/core/classifiers/classifiers/__init__.py +0 -0
  44. wisent/core/classifiers/classifiers/core/__init__.py +0 -0
  45. wisent/core/classifiers/classifiers/core/atoms.py +748 -0
  46. wisent/core/classifiers/classifiers/models/__init__.py +0 -0
  47. wisent/core/classifiers/classifiers/models/logistic.py +29 -0
  48. wisent/core/classifiers/classifiers/models/mlp.py +47 -0
  49. wisent/core/classifiers/classifiers/rotator.py +137 -0
  50. wisent/core/classifiers/core/__init__.py +1 -0
  51. wisent/core/classifiers/models/__init__.py +1 -0
  52. wisent/core/classifiers/pipeline_steps/__init__.py +1 -0
  53. wisent/core/cli/__init__.py +26 -0
  54. wisent/core/cli/agent/__init__.py +15 -0
  55. wisent/core/cli/agent/apply_steering.py +192 -0
  56. wisent/core/cli/agent/evaluate_response.py +128 -0
  57. wisent/core/cli/agent/generate_synthetic_pairs.py +123 -0
  58. wisent/core/cli/agent/main.py +139 -0
  59. wisent/core/cli/agent/train_classifier.py +173 -0
  60. wisent/core/cli/check_linearity.py +126 -0
  61. wisent/core/cli/create_steering_vector.py +304 -0
  62. wisent/core/cli/diagnose_pairs.py +153 -0
  63. wisent/core/cli/diagnose_vectors.py +404 -0
  64. wisent/core/cli/estimate_unified_goodness_time.py +428 -0
  65. wisent/core/cli/evaluate_refusal.py +241 -0
  66. wisent/core/cli/evaluate_responses.py +926 -0
  67. wisent/core/cli/generate_humanization_pairs.py +128 -0
  68. wisent/core/cli/generate_pairs.py +175 -0
  69. wisent/core/cli/generate_pairs_from_task.py +108 -0
  70. wisent/core/cli/generate_responses.py +160 -0
  71. wisent/core/cli/generate_vector_from_synthetic.py +217 -0
  72. wisent/core/cli/generate_vector_from_task.py +248 -0
  73. wisent/core/cli/get_activations.py +192 -0
  74. wisent/core/cli/inference_config.py +84 -0
  75. wisent/core/cli/inference_config_cli.py +54 -0
  76. wisent/core/cli/modify_weights.py +660 -0
  77. wisent/core/cli/multi_steer.py +112 -0
  78. wisent/core/cli/optimization_cache.py +298 -0
  79. wisent/core/cli/optimize.py +621 -0
  80. wisent/core/cli/optimize_classification.py +473 -0
  81. wisent/core/cli/optimize_sample_size.py +390 -0
  82. wisent/core/cli/optimize_steering.py +3421 -0
  83. wisent/core/cli/optimize_weights.py +1287 -0
  84. wisent/core/cli/steering_method_trainer.py +641 -0
  85. wisent/core/cli/steering_search_space.py +508 -0
  86. wisent/core/cli/tasks.py +940 -0
  87. wisent/core/cli/train_unified_goodness.py +681 -0
  88. wisent/core/cli_logger.py +22 -0
  89. wisent/core/config_manager.py +1731 -0
  90. wisent/core/contrastive_pairs/__init__.py +15 -0
  91. wisent/core/contrastive_pairs/core/__init__.py +0 -0
  92. wisent/core/contrastive_pairs/core/atoms.py +45 -0
  93. wisent/core/contrastive_pairs/core/buliders.py +59 -0
  94. wisent/core/contrastive_pairs/core/pair.py +183 -0
  95. wisent/core/contrastive_pairs/core/response.py +153 -0
  96. wisent/core/contrastive_pairs/core/serialization.py +306 -0
  97. wisent/core/contrastive_pairs/core/set.py +192 -0
  98. wisent/core/contrastive_pairs/diagnostics/__init__.py +79 -0
  99. wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
  100. wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
  101. wisent/core/contrastive_pairs/diagnostics/control_vectors.py +1655 -0
  102. wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
  103. wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
  104. wisent/core/contrastive_pairs/diagnostics/duplicates.py +118 -0
  105. wisent/core/contrastive_pairs/diagnostics/linearity.py +325 -0
  106. wisent/core/contrastive_pairs/diagnostics/vector_quality.py +620 -0
  107. wisent/core/contrastive_pairs/huggingface_pairs/__init__.py +1 -0
  108. wisent/core/contrastive_pairs/huggingface_pairs/atoms.py +255 -0
  109. wisent/core/contrastive_pairs/huggingface_pairs/hf_extractor_manifest.py +470 -0
  110. wisent/core/contrastive_pairs/huggingface_pairs/hf_extractor_registry.py +136 -0
  111. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/__init__.py +44 -0
  112. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/agentbench.py +225 -0
  113. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/agentharm.py +267 -0
  114. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/agentic_search.py +444 -0
  115. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aider_polyglot.py +225 -0
  116. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aime.py +118 -0
  117. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aime2024.py +74 -0
  118. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/aime2025.py +73 -0
  119. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/alpaca_eval.py +153 -0
  120. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/apps.py +182 -0
  121. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/arena_hard.py +179 -0
  122. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/atis.py +89 -0
  123. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/babilong.py +96 -0
  124. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/bangla_mmlu.py +108 -0
  125. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/basqueglue.py +217 -0
  126. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/bec2016eu.py +99 -0
  127. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/bfcl.py +283 -0
  128. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/bhtc_v2.py +87 -0
  129. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/browsecomp.py +245 -0
  130. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/chain_of_thought.py +89 -0
  131. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/chinese_simpleqa.py +209 -0
  132. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/cluewsc.py +177 -0
  133. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/cnn_dailymail.py +92 -0
  134. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codeforces.py +378 -0
  135. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue.py +109 -0
  136. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text.py +15 -0
  137. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_go.py +64 -0
  138. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_java.py +65 -0
  139. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_javascript.py +65 -0
  140. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_php.py +65 -0
  141. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_python.py +65 -0
  142. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_ruby.py +65 -0
  143. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/coding_benchmarks.py +844 -0
  144. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/coedit_gec.py +79 -0
  145. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/conala.py +133 -0
  146. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/concode.py +111 -0
  147. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/dbpedia_14.py +91 -0
  148. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/doc_vqa.py +102 -0
  149. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/donotanswer.py +236 -0
  150. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/ds1000.py +129 -0
  151. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/ds_1000.py +155 -0
  152. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/epec_koref_bin.py +85 -0
  153. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/ethos_binary.py +82 -0
  154. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/evalita_mp.py +165 -0
  155. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/evalita_sp_sum_task_fp_small_p1.py +89 -0
  156. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/facts_grounding.py +181 -0
  157. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/faithbench.py +295 -0
  158. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/financial_tweets.py +100 -0
  159. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flames.py +270 -0
  160. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flan_held_in.py +98 -0
  161. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/flores.py +572 -0
  162. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/frames.py +143 -0
  163. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/freebase.py +99 -0
  164. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/get_negative_example_livecodebench.py +146 -0
  165. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/get_positive_example_livecodebench.py +140 -0
  166. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/gpt3_translation_benchmarks.py +98 -0
  167. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/hallucinations_leaderboard.py +389 -0
  168. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/halueval.py +246 -0
  169. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/harmbench.py +250 -0
  170. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/healthbench.py +181 -0
  171. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/hle.py +106 -0
  172. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/hmmt.py +117 -0
  173. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/humaneval.py +119 -0
  174. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/humanevalpack.py +102 -0
  175. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/instruct_humaneval.py +180 -0
  176. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/instructhumaneval.py +129 -0
  177. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/iwslt2017_ar_en.py +98 -0
  178. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/iwslt2017_en_ar.py +98 -0
  179. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/jailbreakbench.py +258 -0
  180. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/law_stack_exchange.py +101 -0
  181. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/ledgar.py +118 -0
  182. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livecodebench.py +61 -0
  183. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livecodebench_contrastive_pair_generator.py +491 -0
  184. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livecodebench_v6.py +263 -0
  185. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/livemathbench.py +230 -0
  186. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/llama.py +96 -0
  187. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/longform_writing.py +285 -0
  188. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/m_mmlu.py +96 -0
  189. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/math.py +186 -0
  190. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/math500.py +146 -0
  191. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mbpp.py +142 -0
  192. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/meddialog.py +79 -0
  193. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/medical_abstracts.py +101 -0
  194. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/medium_priority_benchmarks.py +787 -0
  195. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mercury.py +111 -0
  196. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mmlu_redux.py +194 -0
  197. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mmlusr.py +108 -0
  198. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multimedqa.py +99 -0
  199. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multipl_e.py +109 -0
  200. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple.py +96 -0
  201. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_choice.py +87 -0
  202. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_cpp.py +128 -0
  203. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_go.py +128 -0
  204. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_java.py +128 -0
  205. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_js.py +128 -0
  206. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_py.py +15 -0
  207. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/multiple_rs.py +128 -0
  208. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/non_greedy_robustness_agieval_aqua_rat.py +92 -0
  209. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/olympiadbench.py +287 -0
  210. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/openllm.py +99 -0
  211. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/option_order_robustness_agieval_aqua_rat.py +92 -0
  212. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/or_bench.py +300 -0
  213. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/penn_treebank.py +80 -0
  214. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/planbench.py +317 -0
  215. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/polymath.py +467 -0
  216. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/prompt_robustness_agieval_aqua_rat.py +92 -0
  217. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/pythia.py +99 -0
  218. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/recode.py +131 -0
  219. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/refusalbench.py +280 -0
  220. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/scicode.py +275 -0
  221. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/self_consistency.py +90 -0
  222. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/simpleqa.py +145 -0
  223. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/sorry_bench.py +211 -0
  224. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/stsb.py +79 -0
  225. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/super_glue_lm_eval_v1.py +99 -0
  226. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/super_glue_lm_eval_v1_seq2seq.py +98 -0
  227. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/super_glue_t5_prompt.py +123 -0
  228. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/super_gpqa.py +106 -0
  229. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/swe_bench.py +428 -0
  230. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/swe_bench_verified.py +158 -0
  231. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/sycophancy_eval.py +205 -0
  232. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/t0_eval.py +79 -0
  233. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/tag.py +98 -0
  234. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/tau_bench.py +305 -0
  235. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/tmlu.py +109 -0
  236. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/toolbench.py +360 -0
  237. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/toolemu.py +386 -0
  238. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/travelplanner.py +286 -0
  239. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/truthfulqa_generation.py +128 -0
  240. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/unfair_tos.py +83 -0
  241. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/vaxx_stance.py +86 -0
  242. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wiceu.py +85 -0
  243. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wikitext103.py +97 -0
  244. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wildguard.py +280 -0
  245. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt14_en_fr.py +97 -0
  246. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt14_fr_en.py +97 -0
  247. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt16_de_en.py +90 -0
  248. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt16_en_de.py +90 -0
  249. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt16_en_ro.py +90 -0
  250. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt16_ro_en.py +90 -0
  251. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/wmt_ro_en_t5_prompt.py +90 -0
  252. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/xsum.py +81 -0
  253. wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
  254. wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +265 -0
  255. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/__init__.py +472 -0
  256. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/aclue.py +24 -0
  257. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/acp.py +33 -0
  258. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/acpbench.py +39 -0
  259. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/advanced_ai_risk.py +59 -0
  260. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/aexams.py +14 -0
  261. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrimgsm.py +10 -0
  262. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrimmlu.py +10 -0
  263. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrixnli.py +9 -0
  264. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench.py +14 -0
  265. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_adr.py +9 -0
  266. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_afriqa.py +9 -0
  267. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_afrisenti.py +9 -0
  268. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_belebele.py +9 -0
  269. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_flores.py +9 -0
  270. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_injongointent.py +9 -0
  271. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_mafand.py +9 -0
  272. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_masakhaner.py +9 -0
  273. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_masakhanews.py +9 -0
  274. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_masakhapos.py +9 -0
  275. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_naijarc.py +9 -0
  276. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_nollysenti.py +9 -0
  277. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_ntrex.py +9 -0
  278. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_openai_mmlu.py +9 -0
  279. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_salt.py +9 -0
  280. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_sib.py +9 -0
  281. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_uhura_arc_easy.py +9 -0
  282. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/afrobench_xlsum.py +9 -0
  283. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/agieval.py +33 -0
  284. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/anli.py +9 -0
  285. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arab_culture.py +24 -0
  286. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabic_leaderboard_acva.py +67 -0
  287. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabic_leaderboard_acva_light.py +67 -0
  288. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabic_leaderboard_complete.py +24 -0
  289. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabic_leaderboard_light.py +81 -0
  290. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arabicmmlu.py +59 -0
  291. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/aradice.py +36 -0
  292. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arc.py +61 -0
  293. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/arithmetic.py +19 -0
  294. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/basque_bench.py +37 -0
  295. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/bbh.py +121 -0
  296. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/bbq.py +9 -0
  297. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/belebele.py +293 -0
  298. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/bertaqa.py +25 -0
  299. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/bigbench.py +300 -0
  300. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/blimp.py +76 -0
  301. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/careqa.py +9 -0
  302. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/catalan_bench.py +43 -0
  303. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/ceval_valid.py +61 -0
  304. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/cmmlu.py +76 -0
  305. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/code_x_glue.py +16 -0
  306. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/copal_id.py +11 -0
  307. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/crows_pairs.py +31 -0
  308. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/csatqa.py +15 -0
  309. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/darija.py +29 -0
  310. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/darijammlu.py +57 -0
  311. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/egymmlu.py +62 -0
  312. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/eus.py +76 -0
  313. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/evalita_mp.py +93 -0
  314. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/fld.py +9 -0
  315. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/flores.py +466 -0
  316. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/freebase.py +9 -0
  317. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/french_bench.py +23 -0
  318. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/galician_bench.py +41 -0
  319. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/glianorex.py +11 -0
  320. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/global_mmlu.py +115 -0
  321. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/gpqa.py +27 -0
  322. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/gsm8k.py +9 -0
  323. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/gsm8k_platinum.py +9 -0
  324. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/haerae.py +14 -0
  325. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/headqa.py +11 -0
  326. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/hellaswag.py +39 -0
  327. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/hendrycks_ethics.py +14 -0
  328. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/hendrycks_math.py +9 -0
  329. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/hrm8k.py +20 -0
  330. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/inverse.py +22 -0
  331. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/japanese_leaderboard.py +20 -0
  332. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/jsonschema_bench.py +9 -0
  333. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/kbl.py +85 -0
  334. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/kmmlu.py +281 -0
  335. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/kobest.py +14 -0
  336. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/kormedmcqa.py +9 -0
  337. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/lambada.py +28 -0
  338. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/leaderboard.py +52 -0
  339. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/libra.py +9 -0
  340. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/lingoly.py +11 -0
  341. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/longbench.py +9 -0
  342. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/m.py +43 -0
  343. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mastermind.py +9 -0
  344. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mathqa.py +9 -0
  345. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/med.py +24 -0
  346. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/meddialog.py +12 -0
  347. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/medqa.py +9 -0
  348. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mela.py +18 -0
  349. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/metabench.py +36 -0
  350. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mgsm.py +44 -0
  351. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/minerva_math.py +16 -0
  352. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mlqa.py +58 -0
  353. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlu.py +70 -0
  354. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlu_pro.py +23 -0
  355. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlu_pro_plus.py +23 -0
  356. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlu_prox.py +191 -0
  357. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmlusr.py +9 -0
  358. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/mmmu.py +46 -0
  359. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/model_written_evals.py +9 -0
  360. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/multiblimp.py +111 -0
  361. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/non.py +23 -0
  362. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/noreval.py +143 -0
  363. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/noridiom.py +20 -0
  364. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/nortruthfulqa.py +32 -0
  365. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/nrk.py +20 -0
  366. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi.py +9 -0
  367. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi_arc_multilingual.py +10 -0
  368. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi_hellaswag_multilingual.py +24 -0
  369. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi_mmlu_multilingual.py +24 -0
  370. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/okapi_truthfulqa_multilingual.py +34 -0
  371. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/paloma.py +25 -0
  372. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/pawsx.py +9 -0
  373. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/persona.py +144 -0
  374. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/pile.py +31 -0
  375. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/polemo2.py +9 -0
  376. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/portuguese_bench.py +31 -0
  377. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/prompt.py +23 -0
  378. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/qa4mre.py +12 -0
  379. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/qasper.py +11 -0
  380. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/ru.py +19 -0
  381. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/ruler.py +9 -0
  382. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/score.py +20 -0
  383. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/scrolls.py +9 -0
  384. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/self_consistency.py +11 -0
  385. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/spanish_bench.py +38 -0
  386. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/storycloze.py +9 -0
  387. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/super_glue_t5_prompt.py +17 -0
  388. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/tinyBenchmarks.py +9 -0
  389. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/tmlu.py +9 -0
  390. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/tmmluplus.py +80 -0
  391. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/translation.py +9 -0
  392. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/truthfulqa.py +76 -0
  393. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/truthfulqa_multi.py +24 -0
  394. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/turkishmmlu.py +30 -0
  395. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/unitxt.py +23 -0
  396. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/unscramble.py +9 -0
  397. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/winogender.py +16 -0
  398. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/wmdp.py +12 -0
  399. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/wmt14.py +16 -0
  400. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/wmt16.py +22 -0
  401. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/wsc273.py +9 -0
  402. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xcopa.py +21 -0
  403. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xnli.py +28 -0
  404. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xnli_eu.py +12 -0
  405. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xquad.py +22 -0
  406. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xstorycloze.py +22 -0
  407. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/xwinograd.py +15 -0
  408. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +478 -0
  409. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +140 -0
  410. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +125 -0
  411. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aclue.py +171 -0
  412. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench.py +207 -0
  413. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench_hard.py +185 -0
  414. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/advanced.py +130 -0
  415. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aexams.py +184 -0
  416. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrimgsm.py +98 -0
  417. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrimmlu.py +113 -0
  418. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrixnli.py +129 -0
  419. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrobench_cot.py +88 -0
  420. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrobench_mc.py +107 -0
  421. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ag.py +134 -0
  422. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/agieval.py +155 -0
  423. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ai2_arc.py +114 -0
  424. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/anagrams1.py +81 -0
  425. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/anagrams2.py +81 -0
  426. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/anli.py +140 -0
  427. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabculture.py +180 -0
  428. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic.py +98 -0
  429. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_exams.py +104 -0
  430. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_complete.py +168 -0
  431. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_light.py +168 -0
  432. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabicmmlu.py +167 -0
  433. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aradice.py +268 -0
  434. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc.py +133 -0
  435. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +118 -0
  436. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +118 -0
  437. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_gen.py +101 -0
  438. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_mc.py +106 -0
  439. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/argument.py +134 -0
  440. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +114 -0
  441. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +122 -0
  442. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/assin.py +103 -0
  443. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/babi.py +113 -0
  444. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/basque_bench.py +155 -0
  445. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/basque_bench_gen.py +168 -0
  446. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/basque_bench_mc.py +139 -0
  447. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bbh.py +133 -0
  448. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bbq.py +169 -0
  449. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/belebele.py +181 -0
  450. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/benchmarks.py +155 -0
  451. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bertaqa.py +165 -0
  452. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhs.py +155 -0
  453. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhtc.py +143 -0
  454. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bigbench.py +170 -0
  455. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp.py +171 -0
  456. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp_nl.py +152 -0
  457. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +117 -0
  458. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq_seq2seq.py +117 -0
  459. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/c4.py +150 -0
  460. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cabbq.py +152 -0
  461. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cabreu.py +127 -0
  462. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/careqa.py +169 -0
  463. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalan_bench.py +155 -0
  464. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalan_bench_gen.py +119 -0
  465. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalan_bench_mc.py +113 -0
  466. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalanqa.py +171 -0
  467. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catcola.py +139 -0
  468. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +117 -0
  469. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ceval.py +223 -0
  470. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ceval_valid.py +163 -0
  471. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/chain.py +110 -0
  472. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/chartqa.py +238 -0
  473. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/claim.py +151 -0
  474. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/click.py +152 -0
  475. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cmmlu.py +166 -0
  476. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cnn.py +144 -0
  477. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cocoteros.py +148 -0
  478. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/code2text.py +161 -0
  479. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/code_x_glue.py +114 -0
  480. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/codexglue.py +107 -0
  481. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coedit.py +149 -0
  482. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cola.py +83 -0
  483. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/commonsense.py +107 -0
  484. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/commonsense_qa.py +127 -0
  485. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +124 -0
  486. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copal_id.py +169 -0
  487. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +162 -0
  488. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqcat.py +114 -0
  489. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/crows_pairs.py +158 -0
  490. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/csatqa.py +152 -0
  491. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cycle.py +107 -0
  492. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cycle_letters.py +81 -0
  493. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darija_bench.py +221 -0
  494. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darijahellaswag.py +174 -0
  495. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/darijammlu.py +152 -0
  496. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/dbpedia.py +157 -0
  497. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/discrim_eval.py +152 -0
  498. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/doc.py +107 -0
  499. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +129 -0
  500. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/egyhellaswag.py +125 -0
  501. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/egymmlu.py +180 -0
  502. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/epec.py +142 -0
  503. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq.py +107 -0
  504. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench.py +194 -0
  505. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench_ca.py +152 -0
  506. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eq_bench_es.py +152 -0
  507. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/esbbq.py +152 -0
  508. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/escola.py +85 -0
  509. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ethics.py +135 -0
  510. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ethos.py +99 -0
  511. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus.py +107 -0
  512. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_exams.py +225 -0
  513. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_proficiency.py +159 -0
  514. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_reading.py +159 -0
  515. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_trivia.py +159 -0
  516. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/evalita_llm.py +166 -0
  517. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/evalita_sp.py +109 -0
  518. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/fda.py +105 -0
  519. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/financial.py +107 -0
  520. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/flan.py +114 -0
  521. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/fld.py +143 -0
  522. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/french_bench.py +202 -0
  523. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/french_bench_mc.py +98 -0
  524. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/french_bench_perplexity.py +86 -0
  525. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galcola.py +109 -0
  526. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galician_bench.py +155 -0
  527. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galician_bench_gen.py +118 -0
  528. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galician_bench_mc.py +112 -0
  529. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gaokao.py +141 -0
  530. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/glianorex.py +118 -0
  531. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_mmlu.py +171 -0
  532. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_piqa.py +152 -0
  533. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/glue.py +109 -0
  534. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gpqa.py +161 -0
  535. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gpt3.py +110 -0
  536. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/groundcocoa.py +184 -0
  537. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm.py +108 -0
  538. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +134 -0
  539. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/haerae.py +152 -0
  540. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +112 -0
  541. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +125 -0
  542. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_ethics.py +225 -0
  543. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_math.py +191 -0
  544. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/histoires_morales.py +179 -0
  545. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hle.py +111 -0
  546. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hrm8k.py +203 -0
  547. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/humaneval.py +124 -0
  548. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/humaneval_infilling.py +152 -0
  549. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/icelandic_winogrande.py +152 -0
  550. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ifeval.py +118 -0
  551. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse.py +107 -0
  552. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse_scaling.py +192 -0
  553. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/iwslt2017.py +117 -0
  554. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ja.py +107 -0
  555. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard.py +155 -0
  556. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard_gen.py +224 -0
  557. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard_mc.py +120 -0
  558. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/jsonschema_bench.py +123 -0
  559. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kbl.py +140 -0
  560. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu.py +168 -0
  561. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu_cot.py +88 -0
  562. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu_mc.py +107 -0
  563. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kobest.py +165 -0
  564. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kormedmcqa.py +160 -0
  565. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada.py +147 -0
  566. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_cloze.py +185 -0
  567. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_multilingual.py +185 -0
  568. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_multilingual_stablelm.py +141 -0
  569. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/law.py +107 -0
  570. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/leaderboard.py +194 -0
  571. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/libra.py +165 -0
  572. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lingoly.py +203 -0
  573. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livemathbench.py +155 -0
  574. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/llama3.py +152 -0
  575. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lm_syneval.py +152 -0
  576. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logieval.py +82 -0
  577. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +115 -0
  578. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +114 -0
  579. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbench.py +152 -0
  580. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbenchv2.py +152 -0
  581. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mastermind.py +203 -0
  582. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mathqa.py +137 -0
  583. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mbpp.py +123 -0
  584. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +115 -0
  585. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/med_concepts_qa.py +224 -0
  586. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/meddialog.py +180 -0
  587. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medical.py +107 -0
  588. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mediqa_qa2019.py +123 -0
  589. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medmcqa.py +169 -0
  590. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +118 -0
  591. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medtext.py +108 -0
  592. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mela.py +96 -0
  593. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/meqsum.py +115 -0
  594. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/metabench.py +154 -0
  595. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mgsm.py +122 -0
  596. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mimic_repsum.py +140 -0
  597. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/minerva_math.py +172 -0
  598. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mlqa.py +143 -0
  599. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu.py +144 -0
  600. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu_cot.py +88 -0
  601. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu_mc.py +107 -0
  602. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlu_pro.py +145 -0
  603. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmlusr.py +189 -0
  604. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mmmu.py +150 -0
  605. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mnli.py +113 -0
  606. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/model_written_evals.py +115 -0
  607. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/moral_stories.py +151 -0
  608. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +111 -0
  609. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mts_dialog.py +118 -0
  610. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mts_dialog_perplexity.py +97 -0
  611. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multiblimp.py +134 -0
  612. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multilingual.py +106 -0
  613. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +114 -0
  614. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +113 -0
  615. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/non.py +107 -0
  616. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval.py +173 -0
  617. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_exact.py +157 -0
  618. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_gen.py +277 -0
  619. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_gen_exact.py +165 -0
  620. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_mc.py +228 -0
  621. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noreval_mc_log_likelihoods.py +223 -0
  622. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/noticia.py +105 -0
  623. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/nq_open.py +135 -0
  624. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi.py +27 -0
  625. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_arc_multilingual.py +167 -0
  626. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_hellaswag_multilingual.py +174 -0
  627. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_mmlu_multilingual.py +162 -0
  628. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/okapi_truthfulqa_multilingual.py +209 -0
  629. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/olaph.py +186 -0
  630. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/olaph_perplexity.py +97 -0
  631. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +118 -0
  632. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/option.py +107 -0
  633. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paloma.py +205 -0
  634. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/parafraseja.py +110 -0
  635. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/parafrases.py +110 -0
  636. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paws.py +107 -0
  637. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/paws_x.py +154 -0
  638. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +115 -0
  639. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/persona.py +246 -0
  640. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/phrases.py +144 -0
  641. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/phrases_ca_va.py +82 -0
  642. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pile.py +161 -0
  643. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pile_10k.py +140 -0
  644. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +116 -0
  645. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/polemo2.py +135 -0
  646. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/polymath.py +155 -0
  647. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/portuguese_bench.py +155 -0
  648. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/portuguese_bench_gen.py +121 -0
  649. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/portuguese_bench_mc.py +103 -0
  650. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prompt.py +107 -0
  651. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +115 -0
  652. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +112 -0
  653. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +119 -0
  654. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +118 -0
  655. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper_bool.py +112 -0
  656. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +111 -0
  657. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnlieu.py +107 -0
  658. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +111 -0
  659. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/quac.py +111 -0
  660. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +124 -0
  661. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/random.py +107 -0
  662. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/realtoxicityprompts.py +124 -0
  663. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +125 -0
  664. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/reversed.py +110 -0
  665. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +111 -0
  666. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ruler.py +170 -0
  667. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +113 -0
  668. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/score.py +177 -0
  669. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/scrolls.py +161 -0
  670. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/scrolls_mc.py +157 -0
  671. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/self.py +110 -0
  672. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sglue.py +131 -0
  673. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sglue_rte.py +119 -0
  674. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/simple_cooccurrence_bias.py +121 -0
  675. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/siqa.py +209 -0
  676. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +114 -0
  677. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/spanish_bench.py +155 -0
  678. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/spanish_bench_gen.py +117 -0
  679. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/spanish_bench_mc.py +110 -0
  680. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad2.py +129 -0
  681. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad_completion.py +121 -0
  682. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sst2.py +111 -0
  683. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/storycloze.py +250 -0
  684. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/summarization.py +107 -0
  685. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super.py +107 -0
  686. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super_glue.py +154 -0
  687. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/superglue.py +111 -0
  688. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/supergpqa.py +111 -0
  689. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +115 -0
  690. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swde.py +179 -0
  691. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sycophancy.py +117 -0
  692. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/t0.py +110 -0
  693. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/teca.py +110 -0
  694. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyarc.py +110 -0
  695. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinybenchmarks.py +155 -0
  696. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinygsm8k.py +110 -0
  697. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyhellaswag.py +110 -0
  698. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinymmlu.py +110 -0
  699. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinytruthfulqa.py +113 -0
  700. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinywinogrande.py +110 -0
  701. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tmmluplus.py +181 -0
  702. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/toxigen.py +91 -0
  703. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/translation.py +149 -0
  704. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +130 -0
  705. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa.py +112 -0
  706. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +120 -0
  707. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +140 -0
  708. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_multi.py +142 -0
  709. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turblimp_core.py +152 -0
  710. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu.py +161 -0
  711. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu_cot.py +104 -0
  712. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu_mc.py +102 -0
  713. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/twenty_newsgroups.py +111 -0
  714. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/unitxt.py +131 -0
  715. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/unscramble.py +155 -0
  716. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/vaxx.py +95 -0
  717. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +130 -0
  718. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +122 -0
  719. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wikitext.py +146 -0
  720. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogender.py +139 -0
  721. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +118 -0
  722. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wmdp.py +155 -0
  723. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wmt14.py +110 -0
  724. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wmt16.py +118 -0
  725. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +114 -0
  726. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +117 -0
  727. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc273.py +180 -0
  728. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xcopa.py +197 -0
  729. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xlsum.py +147 -0
  730. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +131 -0
  731. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xquad.py +203 -0
  732. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +129 -0
  733. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +124 -0
  734. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/yahoo.py +108 -0
  735. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/zhoblimp.py +155 -0
  736. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +56 -0
  737. wisent/core/data_loaders/__init__.py +235 -0
  738. wisent/core/data_loaders/core/__init__.py +0 -0
  739. wisent/core/data_loaders/core/atoms.py +99 -0
  740. wisent/core/data_loaders/loaders/__init__.py +0 -0
  741. wisent/core/data_loaders/loaders/custom.py +120 -0
  742. wisent/core/data_loaders/loaders/huggingface_loader.py +153 -0
  743. wisent/core/data_loaders/loaders/lm_loader.py +494 -0
  744. wisent/core/data_loaders/loaders/lm_loader_special_cases.py +496 -0
  745. wisent/core/data_loaders/loaders/task_interface_loader.py +300 -0
  746. wisent/core/data_loaders/rotator.py +118 -0
  747. wisent/core/detection_handling.py +259 -0
  748. wisent/core/diversity_processors.py +193 -0
  749. wisent/core/download_full_benchmarks.py +1512 -0
  750. wisent/core/errors/__init__.py +203 -0
  751. wisent/core/errors/error_codes.py +763 -0
  752. wisent/core/errors/error_handler.py +134 -0
  753. wisent/core/evaluators/__init__.py +0 -0
  754. wisent/core/evaluators/benchmark_specific/__init__.py +42 -0
  755. wisent/core/evaluators/benchmark_specific/aime_evaluator.py +90 -0
  756. wisent/core/evaluators/benchmark_specific/coding/__init__.py +0 -0
  757. wisent/core/evaluators/benchmark_specific/coding/metrics/__init__.py +0 -0
  758. wisent/core/evaluators/benchmark_specific/coding/metrics/core/__init__.py +0 -0
  759. wisent/core/evaluators/benchmark_specific/coding/metrics/core/atoms.py +36 -0
  760. wisent/core/evaluators/benchmark_specific/coding/metrics/evaluator.py +363 -0
  761. wisent/core/evaluators/benchmark_specific/coding/metrics/passk.py +67 -0
  762. wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/__init__.py +0 -0
  763. wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/core/__init__.py +0 -0
  764. wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/core/atoms.py +27 -0
  765. wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/cpp_sanitizer.py +62 -0
  766. wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/java_sanitizer.py +78 -0
  767. wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/python_sanitizer.py +94 -0
  768. wisent/core/evaluators/benchmark_specific/coding/output_sanitizer/utils.py +126 -0
  769. wisent/core/evaluators/benchmark_specific/coding/providers/__init__.py +18 -0
  770. wisent/core/evaluators/benchmark_specific/coding/providers/core/__init__.py +0 -0
  771. wisent/core/evaluators/benchmark_specific/coding/providers/core/atoms.py +31 -0
  772. wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/__init__.py +3 -0
  773. wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/provider.py +305 -0
  774. wisent/core/evaluators/benchmark_specific/coding/safe_docker/Dockerfile +31 -0
  775. wisent/core/evaluators/benchmark_specific/coding/safe_docker/__init__.py +0 -0
  776. wisent/core/evaluators/benchmark_specific/coding/safe_docker/core/__init__.py +0 -0
  777. wisent/core/evaluators/benchmark_specific/coding/safe_docker/core/atoms.py +105 -0
  778. wisent/core/evaluators/benchmark_specific/coding/safe_docker/core/runtime.py +143 -0
  779. wisent/core/evaluators/benchmark_specific/coding/safe_docker/entrypoint.py +121 -0
  780. wisent/core/evaluators/benchmark_specific/coding/safe_docker/recipes.py +60 -0
  781. wisent/core/evaluators/benchmark_specific/coding/solution_generator.py +258 -0
  782. wisent/core/evaluators/benchmark_specific/conala_evaluator.py +332 -0
  783. wisent/core/evaluators/benchmark_specific/exact_match_evaluator.py +81 -0
  784. wisent/core/evaluators/benchmark_specific/f1_evaluator.py +173 -0
  785. wisent/core/evaluators/benchmark_specific/generation_evaluator.py +488 -0
  786. wisent/core/evaluators/benchmark_specific/livemathbench_evaluator.py +393 -0
  787. wisent/core/evaluators/benchmark_specific/log_likelihoods_evaluator.py +202 -0
  788. wisent/core/evaluators/benchmark_specific/math_evaluator.py +119 -0
  789. wisent/core/evaluators/benchmark_specific/math_parsing/__init__.py +1 -0
  790. wisent/core/evaluators/benchmark_specific/math_parsing/core.py +1640 -0
  791. wisent/core/evaluators/benchmark_specific/math_parsing/extract_boxed.py +48 -0
  792. wisent/core/evaluators/benchmark_specific/math_parsing/is_equiv.py +159 -0
  793. wisent/core/evaluators/benchmark_specific/math_parsing/scripts.py +919 -0
  794. wisent/core/evaluators/benchmark_specific/perplexity_evaluator.py +175 -0
  795. wisent/core/evaluators/benchmark_specific/polymath_evaluator.py +114 -0
  796. wisent/core/evaluators/core/__init__.py +5 -0
  797. wisent/core/evaluators/core/atoms.py +166 -0
  798. wisent/core/evaluators/custom/__init__.py +20 -0
  799. wisent/core/evaluators/custom/custom_evaluator.py +382 -0
  800. wisent/core/evaluators/custom/examples/__init__.py +37 -0
  801. wisent/core/evaluators/custom/examples/desklib_detector.py +166 -0
  802. wisent/core/evaluators/custom/examples/gptzero.py +185 -0
  803. wisent/core/evaluators/custom/examples/humanization.py +79 -0
  804. wisent/core/evaluators/custom/examples/humanization_coherent.py +127 -0
  805. wisent/core/evaluators/custom/examples/roberta_detector.py +173 -0
  806. wisent/core/evaluators/oracles/__init__.py +0 -0
  807. wisent/core/evaluators/oracles/interactive.py +73 -0
  808. wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
  809. wisent/core/evaluators/oracles/truthfulqa_gen_evaluator.py +168 -0
  810. wisent/core/evaluators/oracles/user_specified.py +67 -0
  811. wisent/core/evaluators/personalization/__init__.py +12 -0
  812. wisent/core/evaluators/personalization/alignment.py +166 -0
  813. wisent/core/evaluators/personalization/coherence.py +325 -0
  814. wisent/core/evaluators/personalization/difference.py +73 -0
  815. wisent/core/evaluators/rotator.py +217 -0
  816. wisent/core/evaluators/steering_evaluators.py +386 -0
  817. wisent/core/evaluators/synthetic_evaluator.py +377 -0
  818. wisent/core/hyperparameter_optimizer.py +547 -0
  819. wisent/core/layer.py +17 -0
  820. wisent/core/lm_eval_harness_ground_truth.py +1431 -0
  821. wisent/core/main.py +101 -0
  822. wisent/core/managed_cached_benchmarks.py +609 -0
  823. wisent/core/mixed_benchmark_sampler.py +366 -0
  824. wisent/core/modalities/__init__.py +545 -0
  825. wisent/core/model_persistence.py +302 -0
  826. wisent/core/models/__init__.py +23 -0
  827. wisent/core/models/core/__init__.py +0 -0
  828. wisent/core/models/core/atoms.py +465 -0
  829. wisent/core/models/inference_config.py +127 -0
  830. wisent/core/models/wisent_model.py +893 -0
  831. wisent/core/multi_steering.py +397 -0
  832. wisent/core/opti/__init__.py +0 -0
  833. wisent/core/opti/core/__init__.py +0 -0
  834. wisent/core/opti/core/atoms.py +177 -0
  835. wisent/core/opti/methods/__init__.py +10 -0
  836. wisent/core/opti/methods/opti_classificator.py +172 -0
  837. wisent/core/opti/methods/opti_steering.py +139 -0
  838. wisent/core/opti/methods/opti_weights.py +523 -0
  839. wisent/core/optuna/__init__.py +54 -0
  840. wisent/core/optuna/classifier/__init__.py +25 -0
  841. wisent/core/optuna/classifier/activation_generator.py +351 -0
  842. wisent/core/optuna/classifier/classifier_cache.py +509 -0
  843. wisent/core/optuna/classifier/optuna_classifier_optimizer.py +685 -0
  844. wisent/core/optuna/steering/__init__.py +20 -0
  845. wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +200 -0
  846. wisent/core/optuna/steering/data_utils.py +342 -0
  847. wisent/core/optuna/steering/metrics.py +412 -0
  848. wisent/core/optuna/steering/steering_optimization.py +1096 -0
  849. wisent/core/parser.py +1662 -0
  850. wisent/core/parser_arguments/__init__.py +10 -0
  851. wisent/core/parser_arguments/agent_parser.py +122 -0
  852. wisent/core/parser_arguments/check_linearity_parser.py +82 -0
  853. wisent/core/parser_arguments/configure_model_parser.py +7 -0
  854. wisent/core/parser_arguments/create_steering_vector_parser.py +67 -0
  855. wisent/core/parser_arguments/diagnose_pairs_parser.py +25 -0
  856. wisent/core/parser_arguments/diagnose_vectors_parser.py +72 -0
  857. wisent/core/parser_arguments/evaluate_parser.py +40 -0
  858. wisent/core/parser_arguments/evaluate_refusal_parser.py +32 -0
  859. wisent/core/parser_arguments/evaluate_responses_parser.py +12 -0
  860. wisent/core/parser_arguments/full_optimize_parser.py +194 -0
  861. wisent/core/parser_arguments/generate_pairs_from_task_parser.py +33 -0
  862. wisent/core/parser_arguments/generate_pairs_parser.py +43 -0
  863. wisent/core/parser_arguments/generate_responses_parser.py +16 -0
  864. wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +148 -0
  865. wisent/core/parser_arguments/generate_vector_from_task_parser.py +149 -0
  866. wisent/core/parser_arguments/generate_vector_parser.py +89 -0
  867. wisent/core/parser_arguments/get_activations_parser.py +90 -0
  868. wisent/core/parser_arguments/inference_config_parser.py +65 -0
  869. wisent/core/parser_arguments/main_parser.py +220 -0
  870. wisent/core/parser_arguments/model_config_parser.py +59 -0
  871. wisent/core/parser_arguments/modify_weights_parser.py +309 -0
  872. wisent/core/parser_arguments/monitor_parser.py +17 -0
  873. wisent/core/parser_arguments/multi_steer_parser.py +48 -0
  874. wisent/core/parser_arguments/nonsense_parser.py +26 -0
  875. wisent/core/parser_arguments/optimization_cache_parser.py +64 -0
  876. wisent/core/parser_arguments/optimize_classification_parser.py +108 -0
  877. wisent/core/parser_arguments/optimize_parser.py +142 -0
  878. wisent/core/parser_arguments/optimize_sample_size_parser.py +58 -0
  879. wisent/core/parser_arguments/optimize_steering_parser.py +617 -0
  880. wisent/core/parser_arguments/optimize_weights_parser.py +403 -0
  881. wisent/core/parser_arguments/synthetic_parser.py +117 -0
  882. wisent/core/parser_arguments/tasks_parser.py +591 -0
  883. wisent/core/parser_arguments/train_unified_goodness_parser.py +172 -0
  884. wisent/core/parser_arguments/utils.py +107 -0
  885. wisent/core/prompts/__init__.py +0 -0
  886. wisent/core/prompts/core/__init__.py +0 -0
  887. wisent/core/prompts/core/atom.py +57 -0
  888. wisent/core/prompts/core/prompt_formater.py +148 -0
  889. wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
  890. wisent/core/prompts/prompt_stratiegies/direct_completion.py +26 -0
  891. wisent/core/prompts/prompt_stratiegies/instruction_following.py +26 -0
  892. wisent/core/prompts/prompt_stratiegies/multiple_choice.py +31 -0
  893. wisent/core/prompts/prompt_stratiegies/role_playing.py +33 -0
  894. wisent/core/representation.py +5 -0
  895. wisent/core/save_results.py +277 -0
  896. wisent/core/steering.py +660 -0
  897. wisent/core/steering_method.py +20 -0
  898. wisent/core/steering_methods/__init__.py +54 -0
  899. wisent/core/steering_methods/core/__init__.py +0 -0
  900. wisent/core/steering_methods/core/atoms.py +154 -0
  901. wisent/core/steering_methods/methods/__init__.py +0 -0
  902. wisent/core/steering_methods/methods/caa.py +45 -0
  903. wisent/core/steering_methods/methods/prism.py +588 -0
  904. wisent/core/steering_methods/methods/pulse.py +641 -0
  905. wisent/core/steering_methods/methods/titan.py +1005 -0
  906. wisent/core/steering_methods/preflight.py +322 -0
  907. wisent/core/steering_methods/registry.py +649 -0
  908. wisent/core/steering_methods/rotator.py +121 -0
  909. wisent/core/steering_optimizer.py +1503 -0
  910. wisent/core/synthetic/__init__.py +0 -0
  911. wisent/core/synthetic/cleaners/__init__.py +0 -0
  912. wisent/core/synthetic/cleaners/core/__init__.py +0 -0
  913. wisent/core/synthetic/cleaners/core/atoms.py +58 -0
  914. wisent/core/synthetic/cleaners/deduper_cleaner.py +53 -0
  915. wisent/core/synthetic/cleaners/methods/__init__.py +0 -0
  916. wisent/core/synthetic/cleaners/methods/base_dedupers.py +321 -0
  917. wisent/core/synthetic/cleaners/methods/base_refusalers.py +286 -0
  918. wisent/core/synthetic/cleaners/methods/core/__init__.py +0 -0
  919. wisent/core/synthetic/cleaners/methods/core/atoms.py +47 -0
  920. wisent/core/synthetic/cleaners/pairs_cleaner.py +90 -0
  921. wisent/core/synthetic/cleaners/refusaler_cleaner.py +133 -0
  922. wisent/core/synthetic/db_instructions/__init__.py +0 -0
  923. wisent/core/synthetic/db_instructions/core/__init__.py +0 -0
  924. wisent/core/synthetic/db_instructions/core/atoms.py +25 -0
  925. wisent/core/synthetic/db_instructions/mini_dp.py +115 -0
  926. wisent/core/synthetic/generators/__init__.py +0 -0
  927. wisent/core/synthetic/generators/core/__init__.py +0 -0
  928. wisent/core/synthetic/generators/core/atoms.py +73 -0
  929. wisent/core/synthetic/generators/diversities/__init__.py +0 -0
  930. wisent/core/synthetic/generators/diversities/core/__init__.py +0 -0
  931. wisent/core/synthetic/generators/diversities/core/core.py +68 -0
  932. wisent/core/synthetic/generators/diversities/methods/__init__.py +0 -0
  933. wisent/core/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
  934. wisent/core/synthetic/generators/nonsense_generator.py +150 -0
  935. wisent/core/synthetic/generators/pairs_generator.py +313 -0
  936. wisent/core/task_interface.py +143 -0
  937. wisent/core/task_selector.py +232 -0
  938. wisent/core/tasks/__init__.py +218 -0
  939. wisent/core/tasks/aime_task.py +142 -0
  940. wisent/core/tasks/file_task.py +212 -0
  941. wisent/core/tasks/hle_task.py +180 -0
  942. wisent/core/tasks/hmmt_task.py +120 -0
  943. wisent/core/tasks/livecodebench_task.py +94 -0
  944. wisent/core/tasks/livemathbench_task.py +159 -0
  945. wisent/core/tasks/lm_eval_task.py +611 -0
  946. wisent/core/tasks/math500_task.py +84 -0
  947. wisent/core/tasks/polymath_task.py +147 -0
  948. wisent/core/tasks/supergpqa_task.py +220 -0
  949. wisent/core/time_estimator.py +155 -0
  950. wisent/core/timing_calibration.py +176 -0
  951. wisent/core/tracking/__init__.py +54 -0
  952. wisent/core/tracking/latency.py +620 -0
  953. wisent/core/tracking/memory.py +360 -0
  954. wisent/core/trainers/__init__.py +0 -0
  955. wisent/core/trainers/core/__init__.py +11 -0
  956. wisent/core/trainers/core/atoms.py +45 -0
  957. wisent/core/trainers/steering_trainer.py +365 -0
  958. wisent/core/universal_subspace.py +918 -0
  959. wisent/core/user_model_config.py +158 -0
  960. wisent/core/utils/__init__.py +64 -0
  961. wisent/core/utils/base_rotator.py +292 -0
  962. wisent/core/utils/dataset_splits.py +197 -0
  963. wisent/core/utils/device.py +279 -0
  964. wisent/core/weight_modification/__init__.py +134 -0
  965. wisent/core/weight_modification/additive.py +340 -0
  966. wisent/core/weight_modification/directional.py +1357 -0
  967. wisent/core/weight_modification/export.py +359 -0
  968. wisent/core/weight_modification/multi_direction.py +410 -0
  969. wisent/core/weight_modification/utils.py +236 -0
  970. wisent/core/wisent.py +660 -0
  971. wisent/examples/contrastive_pairs/humanization_human_vs_ai.json +2112 -0
  972. wisent/examples/scripts/1/test_basqueglue_evaluation.json +51 -0
  973. wisent/examples/scripts/1/test_basqueglue_pairs.json +14 -0
  974. wisent/examples/scripts/1/test_bec2016eu_evaluation.json +51 -0
  975. wisent/examples/scripts/1/test_bec2016eu_pairs.json +14 -0
  976. wisent/examples/scripts/1/test_belebele_evaluation.json +51 -0
  977. wisent/examples/scripts/1/test_belebele_pairs.json +14 -0
  978. wisent/examples/scripts/1/test_benchmarks_evaluation.json +51 -0
  979. wisent/examples/scripts/1/test_benchmarks_pairs.json +14 -0
  980. wisent/examples/scripts/1/test_bertaqa_evaluation.json +51 -0
  981. wisent/examples/scripts/1/test_bertaqa_pairs.json +14 -0
  982. wisent/examples/scripts/1/test_bhtc_v2_evaluation.json +30 -0
  983. wisent/examples/scripts/1/test_bhtc_v2_pairs.json +8 -0
  984. wisent/examples/scripts/1/test_boolq-seq2seq_evaluation.json +30 -0
  985. wisent/examples/scripts/1/test_boolq-seq2seq_pairs.json +8 -0
  986. wisent/examples/scripts/1/test_cabreu_evaluation.json +30 -0
  987. wisent/examples/scripts/1/test_cabreu_pairs.json +8 -0
  988. wisent/examples/scripts/1/test_careqa_en_evaluation.json +30 -0
  989. wisent/examples/scripts/1/test_careqa_en_pairs.json +8 -0
  990. wisent/examples/scripts/1/test_careqa_evaluation.json +30 -0
  991. wisent/examples/scripts/1/test_careqa_pairs.json +8 -0
  992. wisent/examples/scripts/1/test_catalanqa_evaluation.json +30 -0
  993. wisent/examples/scripts/1/test_catalanqa_pairs.json +8 -0
  994. wisent/examples/scripts/1/test_catcola_evaluation.json +30 -0
  995. wisent/examples/scripts/1/test_catcola_pairs.json +8 -0
  996. wisent/examples/scripts/1/test_chartqa_evaluation.json +30 -0
  997. wisent/examples/scripts/1/test_chartqa_pairs.json +8 -0
  998. wisent/examples/scripts/1/test_claim_stance_topic_evaluation.json +30 -0
  999. wisent/examples/scripts/1/test_claim_stance_topic_pairs.json +8 -0
  1000. wisent/examples/scripts/1/test_cnn_dailymail_evaluation.json +30 -0
  1001. wisent/examples/scripts/1/test_cnn_dailymail_pairs.json +8 -0
  1002. wisent/examples/scripts/1/test_cocoteros_es_evaluation.json +30 -0
  1003. wisent/examples/scripts/1/test_cocoteros_es_pairs.json +8 -0
  1004. wisent/examples/scripts/1/test_coedit_gec_evaluation.json +30 -0
  1005. wisent/examples/scripts/1/test_coedit_gec_pairs.json +8 -0
  1006. wisent/examples/scripts/1/test_cola_evaluation.json +30 -0
  1007. wisent/examples/scripts/1/test_cola_pairs.json +8 -0
  1008. wisent/examples/scripts/1/test_coqcat_evaluation.json +30 -0
  1009. wisent/examples/scripts/1/test_coqcat_pairs.json +8 -0
  1010. wisent/examples/scripts/1/test_dbpedia_14_evaluation.json +30 -0
  1011. wisent/examples/scripts/1/test_dbpedia_14_pairs.json +8 -0
  1012. wisent/examples/scripts/1/test_epec_koref_bin_evaluation.json +30 -0
  1013. wisent/examples/scripts/1/test_epec_koref_bin_pairs.json +8 -0
  1014. wisent/examples/scripts/1/test_ethos_binary_evaluation.json +30 -0
  1015. wisent/examples/scripts/1/test_ethos_binary_pairs.json +8 -0
  1016. wisent/examples/scripts/2/test_afrimgsm_direct_amh_evaluation.json +30 -0
  1017. wisent/examples/scripts/2/test_afrimgsm_direct_amh_pairs.json +8 -0
  1018. wisent/examples/scripts/2/test_afrimmlu_direct_amh_evaluation.json +30 -0
  1019. wisent/examples/scripts/2/test_afrimmlu_direct_amh_pairs.json +8 -0
  1020. wisent/examples/scripts/2/test_afrixnli_en_direct_amh_evaluation.json +30 -0
  1021. wisent/examples/scripts/2/test_afrixnli_en_direct_amh_pairs.json +8 -0
  1022. wisent/examples/scripts/2/test_arc_ar_evaluation.json +30 -0
  1023. wisent/examples/scripts/2/test_arc_ar_pairs.json +8 -0
  1024. wisent/examples/scripts/2/test_atis_evaluation.json +30 -0
  1025. wisent/examples/scripts/2/test_atis_pairs.json +8 -0
  1026. wisent/examples/scripts/2/test_babi_evaluation.json +30 -0
  1027. wisent/examples/scripts/2/test_babi_pairs.json +8 -0
  1028. wisent/examples/scripts/2/test_babilong_evaluation.json +30 -0
  1029. wisent/examples/scripts/2/test_babilong_pairs.json +8 -0
  1030. wisent/examples/scripts/2/test_bangla_mmlu_evaluation.json +30 -0
  1031. wisent/examples/scripts/2/test_bangla_mmlu_pairs.json +8 -0
  1032. wisent/examples/scripts/2/test_basque-glue_pairs.json +14 -0
  1033. wisent/examples/scripts/benchmark_tags.json +2140 -0
  1034. wisent/examples/scripts/lm_eval_readme.json +4 -0
  1035. wisent/examples/scripts/results/benchmark_descriptions.json +1244 -0
  1036. wisent/examples/scripts/results/benchmark_evaluation_methods.json +66 -0
  1037. wisent/examples/scripts/results/benchmark_evaluator_mapping.json +2781 -0
  1038. wisent/examples/scripts/results/benchmark_evaluator_mapping_updated.json +30536 -0
  1039. wisent/examples/scripts/results/benchmark_evaluators_clean.json +469 -0
  1040. wisent/examples/scripts/results/benchmark_methods_summary.json +260 -0
  1041. wisent/examples/scripts/results/benchmark_pair_creation_methods.json +66 -0
  1042. wisent/examples/scripts/results/benchmark_pair_totals.json +269 -0
  1043. wisent/examples/scripts/results/benchmark_tags.json +917 -0
  1044. wisent/examples/scripts/results/benchmark_test_summary_nov4.json +71 -0
  1045. wisent/examples/scripts/results/coding_benchmarks_test_code_status.json +150 -0
  1046. wisent/examples/scripts/results/failing_benchmarks.json +946 -0
  1047. wisent/examples/scripts/results/failing_benchmarks_list.json +41 -0
  1048. wisent/examples/scripts/results/failing_benchmarks_test_results.json +945 -0
  1049. wisent/examples/scripts/results/missing_benchmark_tags.json +341 -0
  1050. wisent/examples/scripts/results/test_20_newsgroups_evaluation.json +30 -0
  1051. wisent/examples/scripts/results/test_20_newsgroups_pairs.json +8 -0
  1052. wisent/examples/scripts/results/test_AraDICE_evaluation.json +51 -0
  1053. wisent/examples/scripts/results/test_AraDICE_pairs.json +14 -0
  1054. wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_evaluation.json +30 -0
  1055. wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_pairs.json +8 -0
  1056. wisent/examples/scripts/results/test_ArabCulture_evaluation.json +51 -0
  1057. wisent/examples/scripts/results/test_ArabCulture_pairs.json +14 -0
  1058. wisent/examples/scripts/results/test_Tag_evaluation.json +30 -0
  1059. wisent/examples/scripts/results/test_Tag_pairs.json +8 -0
  1060. wisent/examples/scripts/results/test_aclue_evaluation.json +51 -0
  1061. wisent/examples/scripts/results/test_aclue_pairs.json +14 -0
  1062. wisent/examples/scripts/results/test_acp_bench_evaluation.json +51 -0
  1063. wisent/examples/scripts/results/test_acp_bench_hard_evaluation.json +51 -0
  1064. wisent/examples/scripts/results/test_acp_bench_hard_pairs.json +14 -0
  1065. wisent/examples/scripts/results/test_acp_bench_pairs.json +14 -0
  1066. wisent/examples/scripts/results/test_advanced_ai_risk_evaluation.json +51 -0
  1067. wisent/examples/scripts/results/test_advanced_ai_risk_pairs.json +14 -0
  1068. wisent/examples/scripts/results/test_aexams_evaluation.json +51 -0
  1069. wisent/examples/scripts/results/test_aexams_pairs.json +14 -0
  1070. wisent/examples/scripts/results/test_afrimgsm_direct_amh_evaluation.json +30 -0
  1071. wisent/examples/scripts/results/test_afrimgsm_direct_amh_pairs.json +8 -0
  1072. wisent/examples/scripts/results/test_afrimmlu_direct_amh_evaluation.json +30 -0
  1073. wisent/examples/scripts/results/test_afrimmlu_direct_amh_pairs.json +8 -0
  1074. wisent/examples/scripts/results/test_afrixnli_en_direct_amh_evaluation.json +30 -0
  1075. wisent/examples/scripts/results/test_afrixnli_en_direct_amh_pairs.json +8 -0
  1076. wisent/examples/scripts/results/test_ag_news_evaluation.json +30 -0
  1077. wisent/examples/scripts/results/test_ag_news_pairs.json +8 -0
  1078. wisent/examples/scripts/results/test_agieval_evaluation.json +51 -0
  1079. wisent/examples/scripts/results/test_agieval_pairs.json +14 -0
  1080. wisent/examples/scripts/results/test_aime2024_evaluation.json +30 -0
  1081. wisent/examples/scripts/results/test_aime2024_pairs.json +8 -0
  1082. wisent/examples/scripts/results/test_aime2025_evaluation.json +30 -0
  1083. wisent/examples/scripts/results/test_aime2025_pairs.json +8 -0
  1084. wisent/examples/scripts/results/test_aime_evaluation.json +30 -0
  1085. wisent/examples/scripts/results/test_aime_pairs.json +8 -0
  1086. wisent/examples/scripts/results/test_anagrams1_evaluation.json +30 -0
  1087. wisent/examples/scripts/results/test_anagrams1_pairs.json +8 -0
  1088. wisent/examples/scripts/results/test_anagrams2_evaluation.json +30 -0
  1089. wisent/examples/scripts/results/test_anagrams2_pairs.json +8 -0
  1090. wisent/examples/scripts/results/test_anli_evaluation.json +30 -0
  1091. wisent/examples/scripts/results/test_anli_pairs.json +8 -0
  1092. wisent/examples/scripts/results/test_apps_evaluation.json +30 -0
  1093. wisent/examples/scripts/results/test_apps_pairs.json +8 -0
  1094. wisent/examples/scripts/results/test_arabic_exams_evaluation.json +30 -0
  1095. wisent/examples/scripts/results/test_arabic_exams_pairs.json +8 -0
  1096. wisent/examples/scripts/results/test_arabic_leaderboard_complete_evaluation.json +51 -0
  1097. wisent/examples/scripts/results/test_arabic_leaderboard_complete_pairs.json +14 -0
  1098. wisent/examples/scripts/results/test_arabic_leaderboard_light_evaluation.json +51 -0
  1099. wisent/examples/scripts/results/test_arabic_leaderboard_light_pairs.json +14 -0
  1100. wisent/examples/scripts/results/test_arabicmmlu_evaluation.json +51 -0
  1101. wisent/examples/scripts/results/test_arabicmmlu_pairs.json +14 -0
  1102. wisent/examples/scripts/results/test_aradice/test_aradice_evaluation.json +51 -0
  1103. wisent/examples/scripts/results/test_aradice/test_aradice_pairs.json +14 -0
  1104. wisent/examples/scripts/results/test_aradice3/test_aradice_evaluation.json +51 -0
  1105. wisent/examples/scripts/results/test_aradice3/test_aradice_pairs.json +14 -0
  1106. wisent/examples/scripts/results/test_arc_ar_evaluation.json +30 -0
  1107. wisent/examples/scripts/results/test_arc_ar_pairs.json +8 -0
  1108. wisent/examples/scripts/results/test_arc_challenge_evaluation.json +30 -0
  1109. wisent/examples/scripts/results/test_arc_challenge_pairs.json +8 -0
  1110. wisent/examples/scripts/results/test_arc_easy_evaluation.json +30 -0
  1111. wisent/examples/scripts/results/test_arc_easy_pairs.json +8 -0
  1112. wisent/examples/scripts/results/test_argument_topic_evaluation.json +30 -0
  1113. wisent/examples/scripts/results/test_argument_topic_pairs.json +8 -0
  1114. wisent/examples/scripts/results/test_arithmetic_evaluation.json +51 -0
  1115. wisent/examples/scripts/results/test_arithmetic_pairs.json +14 -0
  1116. wisent/examples/scripts/results/test_asdiv_evaluation.json +30 -0
  1117. wisent/examples/scripts/results/test_asdiv_pairs.json +8 -0
  1118. wisent/examples/scripts/results/test_assin_entailment_evaluation.json +30 -0
  1119. wisent/examples/scripts/results/test_assin_entailment_pairs.json +8 -0
  1120. wisent/examples/scripts/results/test_atis_evaluation.json +30 -0
  1121. wisent/examples/scripts/results/test_atis_pairs.json +8 -0
  1122. wisent/examples/scripts/results/test_babi_evaluation.json +30 -0
  1123. wisent/examples/scripts/results/test_babi_pairs.json +8 -0
  1124. wisent/examples/scripts/results/test_babilong_evaluation.json +30 -0
  1125. wisent/examples/scripts/results/test_babilong_pairs.json +8 -0
  1126. wisent/examples/scripts/results/test_bangla_mmlu_evaluation.json +30 -0
  1127. wisent/examples/scripts/results/test_bangla_mmlu_pairs.json +8 -0
  1128. wisent/examples/scripts/results/test_banking77_evaluation.json +30 -0
  1129. wisent/examples/scripts/results/test_banking77_pairs.json +8 -0
  1130. wisent/examples/scripts/results/test_basque/test_basque-glue_pairs.json +14 -0
  1131. wisent/examples/scripts/results/test_basque-glue_evaluation.json +51 -0
  1132. wisent/examples/scripts/results/test_basque-glue_pairs.json +14 -0
  1133. wisent/examples/scripts/results/test_basque2/test_basque-glue_evaluation.json +51 -0
  1134. wisent/examples/scripts/results/test_basque2/test_basque-glue_pairs.json +14 -0
  1135. wisent/examples/scripts/results/test_basque_bench_evaluation.json +51 -0
  1136. wisent/examples/scripts/results/test_basque_bench_pairs.json +14 -0
  1137. wisent/examples/scripts/results/test_basque_glue/test_basque-glue_evaluation.json +51 -0
  1138. wisent/examples/scripts/results/test_basque_glue/test_basque-glue_pairs.json +14 -0
  1139. wisent/examples/scripts/results/test_basqueglue_evaluation.json +51 -0
  1140. wisent/examples/scripts/results/test_basqueglue_pairs.json +14 -0
  1141. wisent/examples/scripts/results/test_bbh_evaluation.json +51 -0
  1142. wisent/examples/scripts/results/test_bbh_pairs.json +14 -0
  1143. wisent/examples/scripts/results/test_bbq_evaluation.json +30 -0
  1144. wisent/examples/scripts/results/test_bbq_pairs.json +8 -0
  1145. wisent/examples/scripts/results/test_bec2016eu_evaluation.json +51 -0
  1146. wisent/examples/scripts/results/test_bec2016eu_pairs.json +14 -0
  1147. wisent/examples/scripts/results/test_belebele_evaluation.json +51 -0
  1148. wisent/examples/scripts/results/test_belebele_pairs.json +14 -0
  1149. wisent/examples/scripts/results/test_benchmarks_evaluation.json +51 -0
  1150. wisent/examples/scripts/results/test_benchmarks_pairs.json +14 -0
  1151. wisent/examples/scripts/results/test_bertaqa_evaluation.json +51 -0
  1152. wisent/examples/scripts/results/test_bertaqa_pairs.json +14 -0
  1153. wisent/examples/scripts/results/test_bhtc_v2_evaluation.json +30 -0
  1154. wisent/examples/scripts/results/test_bhtc_v2_pairs.json +8 -0
  1155. wisent/examples/scripts/results/test_bigbench_evaluation.json +51 -0
  1156. wisent/examples/scripts/results/test_bigbench_pairs.json +14 -0
  1157. wisent/examples/scripts/results/test_blimp_evaluation.json +51 -0
  1158. wisent/examples/scripts/results/test_blimp_pairs.json +14 -0
  1159. wisent/examples/scripts/results/test_boolq/test_boolq_evaluation.json +30 -0
  1160. wisent/examples/scripts/results/test_boolq/test_boolq_pairs.json +8 -0
  1161. wisent/examples/scripts/results/test_boolq-seq2seq_evaluation.json +30 -0
  1162. wisent/examples/scripts/results/test_boolq-seq2seq_pairs.json +8 -0
  1163. wisent/examples/scripts/results/test_boolq_evaluation.json +30 -0
  1164. wisent/examples/scripts/results/test_boolq_pairs.json +8 -0
  1165. wisent/examples/scripts/results/test_c4_evaluation.json +30 -0
  1166. wisent/examples/scripts/results/test_c4_pairs.json +8 -0
  1167. wisent/examples/scripts/results/test_cabreu_evaluation.json +30 -0
  1168. wisent/examples/scripts/results/test_cabreu_pairs.json +8 -0
  1169. wisent/examples/scripts/results/test_careqa_evaluation.json +30 -0
  1170. wisent/examples/scripts/results/test_careqa_pairs.json +8 -0
  1171. wisent/examples/scripts/results/test_catalan_bench_evaluation.json +51 -0
  1172. wisent/examples/scripts/results/test_catalan_bench_pairs.json +14 -0
  1173. wisent/examples/scripts/results/test_catalanqa_evaluation.json +30 -0
  1174. wisent/examples/scripts/results/test_catalanqa_pairs.json +8 -0
  1175. wisent/examples/scripts/results/test_catcola_evaluation.json +30 -0
  1176. wisent/examples/scripts/results/test_catcola_pairs.json +8 -0
  1177. wisent/examples/scripts/results/test_cb_evaluation.json +30 -0
  1178. wisent/examples/scripts/results/test_cb_pairs.json +8 -0
  1179. wisent/examples/scripts/results/test_ceval/test_ceval_evaluation.json +51 -0
  1180. wisent/examples/scripts/results/test_ceval/test_ceval_pairs.json +14 -0
  1181. wisent/examples/scripts/results/test_ceval_accountant/test_ceval-valid_accountant_evaluation.json +30 -0
  1182. wisent/examples/scripts/results/test_ceval_accountant/test_ceval-valid_accountant_pairs.json +8 -0
  1183. wisent/examples/scripts/results/test_ceval_evaluation.json +51 -0
  1184. wisent/examples/scripts/results/test_ceval_pairs.json +14 -0
  1185. wisent/examples/scripts/results/test_ceval_valid/test_ceval_valid_evaluation.json +51 -0
  1186. wisent/examples/scripts/results/test_ceval_valid/test_ceval_valid_pairs.json +14 -0
  1187. wisent/examples/scripts/results/test_chain_of_thought_evaluation.json +51 -0
  1188. wisent/examples/scripts/results/test_chain_of_thought_pairs.json +14 -0
  1189. wisent/examples/scripts/results/test_chartqa_evaluation.json +30 -0
  1190. wisent/examples/scripts/results/test_chartqa_pairs.json +8 -0
  1191. wisent/examples/scripts/results/test_claim_stance_topic_evaluation.json +30 -0
  1192. wisent/examples/scripts/results/test_claim_stance_topic_pairs.json +8 -0
  1193. wisent/examples/scripts/results/test_cmmlu_evaluation.json +51 -0
  1194. wisent/examples/scripts/results/test_cmmlu_pairs.json +14 -0
  1195. wisent/examples/scripts/results/test_cnn_dailymail_evaluation.json +30 -0
  1196. wisent/examples/scripts/results/test_cnn_dailymail_pairs.json +8 -0
  1197. wisent/examples/scripts/results/test_cocoteros_es_evaluation.json +30 -0
  1198. wisent/examples/scripts/results/test_cocoteros_es_pairs.json +8 -0
  1199. wisent/examples/scripts/results/test_codexglue_code_to_text_go_evaluation.json +30 -0
  1200. wisent/examples/scripts/results/test_codexglue_code_to_text_go_pairs.json +8 -0
  1201. wisent/examples/scripts/results/test_codexglue_code_to_text_java_evaluation.json +30 -0
  1202. wisent/examples/scripts/results/test_codexglue_code_to_text_java_pairs.json +8 -0
  1203. wisent/examples/scripts/results/test_codexglue_code_to_text_javascript_evaluation.json +30 -0
  1204. wisent/examples/scripts/results/test_codexglue_code_to_text_javascript_pairs.json +8 -0
  1205. wisent/examples/scripts/results/test_codexglue_code_to_text_php_evaluation.json +30 -0
  1206. wisent/examples/scripts/results/test_codexglue_code_to_text_php_pairs.json +8 -0
  1207. wisent/examples/scripts/results/test_codexglue_code_to_text_python_evaluation.json +30 -0
  1208. wisent/examples/scripts/results/test_codexglue_code_to_text_python_pairs.json +8 -0
  1209. wisent/examples/scripts/results/test_codexglue_code_to_text_ruby_evaluation.json +30 -0
  1210. wisent/examples/scripts/results/test_codexglue_code_to_text_ruby_pairs.json +8 -0
  1211. wisent/examples/scripts/results/test_coedit_gec_evaluation.json +30 -0
  1212. wisent/examples/scripts/results/test_coedit_gec_pairs.json +8 -0
  1213. wisent/examples/scripts/results/test_cola_evaluation.json +30 -0
  1214. wisent/examples/scripts/results/test_cola_pairs.json +8 -0
  1215. wisent/examples/scripts/results/test_commonsense_qa_evaluation.json +30 -0
  1216. wisent/examples/scripts/results/test_commonsense_qa_pairs.json +8 -0
  1217. wisent/examples/scripts/results/test_conala_evaluation.json +30 -0
  1218. wisent/examples/scripts/results/test_conala_pairs.json +8 -0
  1219. wisent/examples/scripts/results/test_concode_evaluation.json +30 -0
  1220. wisent/examples/scripts/results/test_concode_pairs.json +8 -0
  1221. wisent/examples/scripts/results/test_copa_evaluation.json +30 -0
  1222. wisent/examples/scripts/results/test_copa_pairs.json +8 -0
  1223. wisent/examples/scripts/results/test_copal_id_evaluation.json +30 -0
  1224. wisent/examples/scripts/results/test_copal_id_pairs.json +8 -0
  1225. wisent/examples/scripts/results/test_coqa_evaluation.json +30 -0
  1226. wisent/examples/scripts/results/test_coqa_pairs.json +8 -0
  1227. wisent/examples/scripts/results/test_coqcat_evaluation.json +30 -0
  1228. wisent/examples/scripts/results/test_coqcat_pairs.json +8 -0
  1229. wisent/examples/scripts/results/test_crows_pairs_evaluation.json +51 -0
  1230. wisent/examples/scripts/results/test_crows_pairs_pairs.json +14 -0
  1231. wisent/examples/scripts/results/test_csatqa_evaluation.json +51 -0
  1232. wisent/examples/scripts/results/test_csatqa_pairs.json +14 -0
  1233. wisent/examples/scripts/results/test_cycle_letters_evaluation.json +30 -0
  1234. wisent/examples/scripts/results/test_cycle_letters_pairs.json +8 -0
  1235. wisent/examples/scripts/results/test_darija_bench/test_darija_bench_evaluation.json +51 -0
  1236. wisent/examples/scripts/results/test_darija_bench/test_darija_bench_pairs.json +14 -0
  1237. wisent/examples/scripts/results/test_darija_bench_evaluation.json +51 -0
  1238. wisent/examples/scripts/results/test_darija_bench_pairs.json +14 -0
  1239. wisent/examples/scripts/results/test_darijahellaswag_evaluation.json +30 -0
  1240. wisent/examples/scripts/results/test_darijahellaswag_pairs.json +8 -0
  1241. wisent/examples/scripts/results/test_darijammlu_evaluation.json +51 -0
  1242. wisent/examples/scripts/results/test_darijammlu_pairs.json +14 -0
  1243. wisent/examples/scripts/results/test_dbpedia_14_evaluation.json +30 -0
  1244. wisent/examples/scripts/results/test_dbpedia_14_pairs.json +8 -0
  1245. wisent/examples/scripts/results/test_drop_evaluation.json +30 -0
  1246. wisent/examples/scripts/results/test_drop_pairs.json +8 -0
  1247. wisent/examples/scripts/results/test_ds1000_evaluation.json +30 -0
  1248. wisent/examples/scripts/results/test_ds1000_pairs.json +8 -0
  1249. wisent/examples/scripts/results/test_egyhellaswag_evaluation.json +30 -0
  1250. wisent/examples/scripts/results/test_egyhellaswag_pairs.json +8 -0
  1251. wisent/examples/scripts/results/test_egymmlu_evaluation.json +51 -0
  1252. wisent/examples/scripts/results/test_egymmlu_pairs.json +14 -0
  1253. wisent/examples/scripts/results/test_epec_koref_bin_evaluation.json +30 -0
  1254. wisent/examples/scripts/results/test_epec_koref_bin_pairs.json +8 -0
  1255. wisent/examples/scripts/results/test_eq_bench_evaluation.json +30 -0
  1256. wisent/examples/scripts/results/test_eq_bench_pairs.json +8 -0
  1257. wisent/examples/scripts/results/test_escola_evaluation.json +30 -0
  1258. wisent/examples/scripts/results/test_escola_pairs.json +8 -0
  1259. wisent/examples/scripts/results/test_ethics_cm_evaluation.json +30 -0
  1260. wisent/examples/scripts/results/test_ethics_cm_pairs.json +8 -0
  1261. wisent/examples/scripts/results/test_ethos_binary_evaluation.json +30 -0
  1262. wisent/examples/scripts/results/test_ethos_binary_pairs.json +8 -0
  1263. wisent/examples/scripts/results/test_eus_exams/test_eus_exams_evaluation.json +51 -0
  1264. wisent/examples/scripts/results/test_eus_exams/test_eus_exams_pairs.json +14 -0
  1265. wisent/examples/scripts/results/test_eus_exams_es_evaluation.json +51 -0
  1266. wisent/examples/scripts/results/test_eus_exams_es_pairs.json +14 -0
  1267. wisent/examples/scripts/results/test_eus_exams_evaluation.json +51 -0
  1268. wisent/examples/scripts/results/test_eus_exams_pairs.json +14 -0
  1269. wisent/examples/scripts/results/test_eus_proficiency_evaluation.json +30 -0
  1270. wisent/examples/scripts/results/test_eus_proficiency_pairs.json +8 -0
  1271. wisent/examples/scripts/results/test_eus_reading_evaluation.json +30 -0
  1272. wisent/examples/scripts/results/test_eus_reading_pairs.json +8 -0
  1273. wisent/examples/scripts/results/test_eus_trivia_evaluation.json +30 -0
  1274. wisent/examples/scripts/results/test_eus_trivia_pairs.json +8 -0
  1275. wisent/examples/scripts/results/test_evalita-mp_evaluation.json +51 -0
  1276. wisent/examples/scripts/results/test_evalita-mp_pairs.json +14 -0
  1277. wisent/examples/scripts/results/test_evalita-sp_sum_task_fp-small_p1_evaluation.json +30 -0
  1278. wisent/examples/scripts/results/test_evalita-sp_sum_task_fp-small_p1_pairs.json +8 -0
  1279. wisent/examples/scripts/results/test_evalita_LLM_evaluation.json +51 -0
  1280. wisent/examples/scripts/results/test_evalita_LLM_pairs.json +14 -0
  1281. wisent/examples/scripts/results/test_evalita_llm/test_evalita_llm_evaluation.json +51 -0
  1282. wisent/examples/scripts/results/test_evalita_llm/test_evalita_llm_pairs.json +14 -0
  1283. wisent/examples/scripts/results/test_evalita_mp/test_evalita-mp_te_prompt-1_evaluation.json +30 -0
  1284. wisent/examples/scripts/results/test_evalita_mp/test_evalita-mp_te_prompt-1_pairs.json +8 -0
  1285. wisent/examples/scripts/results/test_evalita_mp2/test_evalita_mp_evaluation.json +51 -0
  1286. wisent/examples/scripts/results/test_evalita_mp2/test_evalita_mp_pairs.json +14 -0
  1287. wisent/examples/scripts/results/test_evalita_sp2/test_evalita-sp_sum_task_fp-small_p1_evaluation.json +30 -0
  1288. wisent/examples/scripts/results/test_evalita_sp2/test_evalita-sp_sum_task_fp-small_p1_pairs.json +8 -0
  1289. wisent/examples/scripts/results/test_fda_evaluation.json +30 -0
  1290. wisent/examples/scripts/results/test_fda_pairs.json +8 -0
  1291. wisent/examples/scripts/results/test_financial_tweets_evaluation.json +30 -0
  1292. wisent/examples/scripts/results/test_financial_tweets_pairs.json +8 -0
  1293. wisent/examples/scripts/results/test_fld/test_fld_evaluation.json +30 -0
  1294. wisent/examples/scripts/results/test_fld/test_fld_pairs.json +8 -0
  1295. wisent/examples/scripts/results/test_fld_evaluation.json +30 -0
  1296. wisent/examples/scripts/results/test_fld_fixed/test_fld_evaluation.json +30 -0
  1297. wisent/examples/scripts/results/test_fld_fixed/test_fld_pairs.json +8 -0
  1298. wisent/examples/scripts/results/test_fld_pairs.json +8 -0
  1299. wisent/examples/scripts/results/test_flores_evaluation.json +51 -0
  1300. wisent/examples/scripts/results/test_flores_pairs.json +14 -0
  1301. wisent/examples/scripts/results/test_freebase_evaluation.json +30 -0
  1302. wisent/examples/scripts/results/test_freebase_pairs.json +8 -0
  1303. wisent/examples/scripts/results/test_french_bench_evaluation.json +51 -0
  1304. wisent/examples/scripts/results/test_french_bench_pairs.json +14 -0
  1305. wisent/examples/scripts/results/test_galcola_evaluation.json +30 -0
  1306. wisent/examples/scripts/results/test_galcola_pairs.json +8 -0
  1307. wisent/examples/scripts/results/test_galician_bench_evaluation.json +51 -0
  1308. wisent/examples/scripts/results/test_galician_bench_pairs.json +14 -0
  1309. wisent/examples/scripts/results/test_glianorex_evaluation.json +30 -0
  1310. wisent/examples/scripts/results/test_glianorex_pairs.json +8 -0
  1311. wisent/examples/scripts/results/test_global_mmlu_evaluation.json +51 -0
  1312. wisent/examples/scripts/results/test_global_mmlu_pairs.json +14 -0
  1313. wisent/examples/scripts/results/test_glue_evaluation.json +51 -0
  1314. wisent/examples/scripts/results/test_glue_pairs.json +14 -0
  1315. wisent/examples/scripts/results/test_gpqa_evaluation.json +51 -0
  1316. wisent/examples/scripts/results/test_gpqa_pairs.json +14 -0
  1317. wisent/examples/scripts/results/test_gpt3_translation_benchmarks_evaluation.json +51 -0
  1318. wisent/examples/scripts/results/test_gpt3_translation_benchmarks_pairs.json +14 -0
  1319. wisent/examples/scripts/results/test_groundcocoa_evaluation.json +30 -0
  1320. wisent/examples/scripts/results/test_groundcocoa_pairs.json +8 -0
  1321. wisent/examples/scripts/results/test_gsm8k_evaluation.json +30 -0
  1322. wisent/examples/scripts/results/test_gsm8k_pairs.json +8 -0
  1323. wisent/examples/scripts/results/test_haerae_evaluation.json +51 -0
  1324. wisent/examples/scripts/results/test_haerae_pairs.json +14 -0
  1325. wisent/examples/scripts/results/test_headqa_evaluation.json +30 -0
  1326. wisent/examples/scripts/results/test_headqa_pairs.json +8 -0
  1327. wisent/examples/scripts/results/test_hellaswag_evaluation.json +30 -0
  1328. wisent/examples/scripts/results/test_hellaswag_pairs.json +8 -0
  1329. wisent/examples/scripts/results/test_hendrycks_ethics_evaluation.json +51 -0
  1330. wisent/examples/scripts/results/test_hendrycks_ethics_pairs.json +14 -0
  1331. wisent/examples/scripts/results/test_hendrycks_math_evaluation.json +51 -0
  1332. wisent/examples/scripts/results/test_hendrycks_math_pairs.json +14 -0
  1333. wisent/examples/scripts/results/test_histoires_morales_evaluation.json +30 -0
  1334. wisent/examples/scripts/results/test_histoires_morales_pairs.json +8 -0
  1335. wisent/examples/scripts/results/test_hmmt_evaluation.json +30 -0
  1336. wisent/examples/scripts/results/test_hmmt_feb_2025_evaluation.json +30 -0
  1337. wisent/examples/scripts/results/test_hmmt_feb_2025_pairs.json +8 -0
  1338. wisent/examples/scripts/results/test_hmmt_pairs.json +8 -0
  1339. wisent/examples/scripts/results/test_hrm8k_evaluation.json +51 -0
  1340. wisent/examples/scripts/results/test_hrm8k_pairs.json +14 -0
  1341. wisent/examples/scripts/results/test_humaneval_evaluation.json +30 -0
  1342. wisent/examples/scripts/results/test_humaneval_pairs.json +8 -0
  1343. wisent/examples/scripts/results/test_humaneval_plus_evaluation.json +30 -0
  1344. wisent/examples/scripts/results/test_humaneval_plus_pairs.json +8 -0
  1345. wisent/examples/scripts/results/test_ifeval_evaluation.json +30 -0
  1346. wisent/examples/scripts/results/test_ifeval_pairs.json +8 -0
  1347. wisent/examples/scripts/results/test_instruct_humaneval/test_instruct_humaneval_evaluation.json +30 -0
  1348. wisent/examples/scripts/results/test_instruct_humaneval/test_instruct_humaneval_pairs.json +8 -0
  1349. wisent/examples/scripts/results/test_instruct_humaneval_evaluation.json +30 -0
  1350. wisent/examples/scripts/results/test_instruct_humaneval_pairs.json +8 -0
  1351. wisent/examples/scripts/results/test_inverse_scaling_evaluation.json +51 -0
  1352. wisent/examples/scripts/results/test_inverse_scaling_hindsight_neglect_10shot_evaluation.json +30 -0
  1353. wisent/examples/scripts/results/test_inverse_scaling_hindsight_neglect_10shot_pairs.json +8 -0
  1354. wisent/examples/scripts/results/test_inverse_scaling_mc/test_inverse_scaling_mc_evaluation.json +51 -0
  1355. wisent/examples/scripts/results/test_inverse_scaling_mc/test_inverse_scaling_mc_pairs.json +14 -0
  1356. wisent/examples/scripts/results/test_inverse_scaling_pairs.json +14 -0
  1357. wisent/examples/scripts/results/test_iwslt2017-ar-en_evaluation.json +30 -0
  1358. wisent/examples/scripts/results/test_iwslt2017-ar-en_pairs.json +8 -0
  1359. wisent/examples/scripts/results/test_iwslt2017-en-ar_evaluation.json +30 -0
  1360. wisent/examples/scripts/results/test_iwslt2017-en-ar_pairs.json +8 -0
  1361. wisent/examples/scripts/results/test_iwslt2017_ar_en/test_iwslt2017-ar-en_evaluation.json +30 -0
  1362. wisent/examples/scripts/results/test_iwslt2017_ar_en/test_iwslt2017-ar-en_pairs.json +8 -0
  1363. wisent/examples/scripts/results/test_iwslt2017_en_ar/test_iwslt2017-en-ar_evaluation.json +30 -0
  1364. wisent/examples/scripts/results/test_iwslt2017_en_ar/test_iwslt2017-en-ar_pairs.json +8 -0
  1365. wisent/examples/scripts/results/test_iwslt2017_group/test_iwslt2017_evaluation.json +30 -0
  1366. wisent/examples/scripts/results/test_iwslt2017_group/test_iwslt2017_pairs.json +8 -0
  1367. wisent/examples/scripts/results/test_japanese_leaderboard_evaluation.json +51 -0
  1368. wisent/examples/scripts/results/test_japanese_leaderboard_pairs.json +14 -0
  1369. wisent/examples/scripts/results/test_jsonschema_bench/test_jsonschema_bench_evaluation.json +30 -0
  1370. wisent/examples/scripts/results/test_jsonschema_bench/test_jsonschema_bench_pairs.json +8 -0
  1371. wisent/examples/scripts/results/test_jsonschema_bench_evaluation.json +30 -0
  1372. wisent/examples/scripts/results/test_jsonschema_bench_final/test_jsonschema_bench_evaluation.json +30 -0
  1373. wisent/examples/scripts/results/test_jsonschema_bench_final/test_jsonschema_bench_pairs.json +8 -0
  1374. wisent/examples/scripts/results/test_jsonschema_bench_pairs.json +8 -0
  1375. wisent/examples/scripts/results/test_kbl_evaluation.json +51 -0
  1376. wisent/examples/scripts/results/test_kbl_fixed/test_kbl_evaluation.json +51 -0
  1377. wisent/examples/scripts/results/test_kbl_fixed/test_kbl_pairs.json +14 -0
  1378. wisent/examples/scripts/results/test_kbl_pairs.json +14 -0
  1379. wisent/examples/scripts/results/test_kmmlu_evaluation.json +51 -0
  1380. wisent/examples/scripts/results/test_kmmlu_pairs.json +14 -0
  1381. wisent/examples/scripts/results/test_kobest_evaluation.json +51 -0
  1382. wisent/examples/scripts/results/test_kobest_pairs.json +14 -0
  1383. wisent/examples/scripts/results/test_kormedmcqa/test_kormedmcqa_evaluation.json +30 -0
  1384. wisent/examples/scripts/results/test_kormedmcqa/test_kormedmcqa_pairs.json +8 -0
  1385. wisent/examples/scripts/results/test_kormedmcqa_dentist/test_kormedmcqa_dentist_evaluation.json +30 -0
  1386. wisent/examples/scripts/results/test_kormedmcqa_dentist/test_kormedmcqa_dentist_pairs.json +8 -0
  1387. wisent/examples/scripts/results/test_kormedmcqa_evaluation.json +30 -0
  1388. wisent/examples/scripts/results/test_kormedmcqa_pairs.json +8 -0
  1389. wisent/examples/scripts/results/test_lambada_cloze_evaluation.json +30 -0
  1390. wisent/examples/scripts/results/test_lambada_cloze_pairs.json +8 -0
  1391. wisent/examples/scripts/results/test_lambada_evaluation.json +30 -0
  1392. wisent/examples/scripts/results/test_lambada_final/test_lambada_openai_mt_stablelm_en_evaluation.json +30 -0
  1393. wisent/examples/scripts/results/test_lambada_final/test_lambada_openai_mt_stablelm_en_pairs.json +8 -0
  1394. wisent/examples/scripts/results/test_lambada_multilingual/test_lambada_multilingual_evaluation.json +51 -0
  1395. wisent/examples/scripts/results/test_lambada_multilingual/test_lambada_multilingual_pairs.json +14 -0
  1396. wisent/examples/scripts/results/test_lambada_multilingual_evaluation.json +51 -0
  1397. wisent/examples/scripts/results/test_lambada_multilingual_pairs.json +14 -0
  1398. wisent/examples/scripts/results/test_lambada_multilingual_stablelm_evaluation.json +51 -0
  1399. wisent/examples/scripts/results/test_lambada_multilingual_stablelm_pairs.json +14 -0
  1400. wisent/examples/scripts/results/test_lambada_openai_evaluation.json +30 -0
  1401. wisent/examples/scripts/results/test_lambada_openai_pairs.json +8 -0
  1402. wisent/examples/scripts/results/test_lambada_pairs.json +8 -0
  1403. wisent/examples/scripts/results/test_lambada_stablelm_en_fixed/test_lambada_openai_mt_stablelm_en_evaluation.json +30 -0
  1404. wisent/examples/scripts/results/test_lambada_stablelm_en_fixed/test_lambada_openai_mt_stablelm_en_pairs.json +8 -0
  1405. wisent/examples/scripts/results/test_lambada_stablelm_fixed/test_lambada_openai_mt_stablelm_en_evaluation.json +30 -0
  1406. wisent/examples/scripts/results/test_lambada_stablelm_fixed/test_lambada_openai_mt_stablelm_en_pairs.json +8 -0
  1407. wisent/examples/scripts/results/test_lambada_standard_evaluation.json +30 -0
  1408. wisent/examples/scripts/results/test_lambada_standard_pairs.json +8 -0
  1409. wisent/examples/scripts/results/test_leaderboard_evaluation.json +51 -0
  1410. wisent/examples/scripts/results/test_leaderboard_pairs.json +14 -0
  1411. wisent/examples/scripts/results/test_libra/test_libra_evaluation.json +51 -0
  1412. wisent/examples/scripts/results/test_libra/test_libra_pairs.json +14 -0
  1413. wisent/examples/scripts/results/test_libra_evaluation.json +51 -0
  1414. wisent/examples/scripts/results/test_libra_pairs.json +14 -0
  1415. wisent/examples/scripts/results/test_lingoly_evaluation.json +30 -0
  1416. wisent/examples/scripts/results/test_lingoly_pairs.json +8 -0
  1417. wisent/examples/scripts/results/test_livecodebench_evaluation.json +30 -0
  1418. wisent/examples/scripts/results/test_livecodebench_pairs.json +8 -0
  1419. wisent/examples/scripts/results/test_livemathbench_cnmo_en_evaluation.json +30 -0
  1420. wisent/examples/scripts/results/test_livemathbench_cnmo_en_pairs.json +8 -0
  1421. wisent/examples/scripts/results/test_livemathbench_cnmo_zh_evaluation.json +30 -0
  1422. wisent/examples/scripts/results/test_livemathbench_cnmo_zh_pairs.json +8 -0
  1423. wisent/examples/scripts/results/test_llama_evaluation.json +30 -0
  1424. wisent/examples/scripts/results/test_llama_pairs.json +8 -0
  1425. wisent/examples/scripts/results/test_logiqa2_evaluation.json +30 -0
  1426. wisent/examples/scripts/results/test_logiqa2_pairs.json +8 -0
  1427. wisent/examples/scripts/results/test_logiqa_evaluation.json +30 -0
  1428. wisent/examples/scripts/results/test_logiqa_pairs.json +8 -0
  1429. wisent/examples/scripts/results/test_m_mmlu_evaluation.json +51 -0
  1430. wisent/examples/scripts/results/test_m_mmlu_pairs.json +14 -0
  1431. wisent/examples/scripts/results/test_mastermind/test_mastermind_evaluation.json +51 -0
  1432. wisent/examples/scripts/results/test_mastermind/test_mastermind_pairs.json +14 -0
  1433. wisent/examples/scripts/results/test_mastermind_24_easy/test_mastermind_24_easy_evaluation.json +30 -0
  1434. wisent/examples/scripts/results/test_mastermind_24_easy/test_mastermind_24_easy_pairs.json +8 -0
  1435. wisent/examples/scripts/results/test_mastermind_evaluation.json +51 -0
  1436. wisent/examples/scripts/results/test_mastermind_pairs.json +14 -0
  1437. wisent/examples/scripts/results/test_math500_evaluation.json +30 -0
  1438. wisent/examples/scripts/results/test_math500_pairs.json +8 -0
  1439. wisent/examples/scripts/results/test_math_evaluation.json +30 -0
  1440. wisent/examples/scripts/results/test_math_pairs.json +8 -0
  1441. wisent/examples/scripts/results/test_mathqa_evaluation.json +30 -0
  1442. wisent/examples/scripts/results/test_mathqa_pairs.json +8 -0
  1443. wisent/examples/scripts/results/test_mbpp_evaluation.json +30 -0
  1444. wisent/examples/scripts/results/test_mbpp_pairs.json +8 -0
  1445. wisent/examples/scripts/results/test_mbpp_plus_evaluation.json +30 -0
  1446. wisent/examples/scripts/results/test_mbpp_plus_pairs.json +8 -0
  1447. wisent/examples/scripts/results/test_mc_taco_evaluation.json +30 -0
  1448. wisent/examples/scripts/results/test_mc_taco_pairs.json +8 -0
  1449. wisent/examples/scripts/results/test_med_concepts_qa/test_med_concepts_qa_evaluation.json +51 -0
  1450. wisent/examples/scripts/results/test_med_concepts_qa/test_med_concepts_qa_pairs.json +14 -0
  1451. wisent/examples/scripts/results/test_med_concepts_qa_atc_easy/test_med_concepts_qa_atc_easy_evaluation.json +30 -0
  1452. wisent/examples/scripts/results/test_med_concepts_qa_atc_easy/test_med_concepts_qa_atc_easy_pairs.json +8 -0
  1453. wisent/examples/scripts/results/test_med_concepts_qa_evaluation.json +51 -0
  1454. wisent/examples/scripts/results/test_med_concepts_qa_pairs.json +14 -0
  1455. wisent/examples/scripts/results/test_meddialog_evaluation.json +30 -0
  1456. wisent/examples/scripts/results/test_meddialog_pairs.json +8 -0
  1457. wisent/examples/scripts/results/test_meddialog_raw_perplexity/test_meddialog_raw_perplexity_evaluation.json +30 -0
  1458. wisent/examples/scripts/results/test_meddialog_raw_perplexity/test_meddialog_raw_perplexity_pairs.json +8 -0
  1459. wisent/examples/scripts/results/test_mediqa_qa2019_evaluation.json +30 -0
  1460. wisent/examples/scripts/results/test_mediqa_qa2019_pairs.json +8 -0
  1461. wisent/examples/scripts/results/test_medmcqa_evaluation.json +30 -0
  1462. wisent/examples/scripts/results/test_medmcqa_pairs.json +8 -0
  1463. wisent/examples/scripts/results/test_medqa_evaluation.json +30 -0
  1464. wisent/examples/scripts/results/test_medqa_pairs.json +8 -0
  1465. wisent/examples/scripts/results/test_medtext_evaluation.json +30 -0
  1466. wisent/examples/scripts/results/test_medtext_pairs.json +8 -0
  1467. wisent/examples/scripts/results/test_mela_evaluation.json +51 -0
  1468. wisent/examples/scripts/results/test_mela_pairs.json +14 -0
  1469. wisent/examples/scripts/results/test_meqsum_evaluation.json +30 -0
  1470. wisent/examples/scripts/results/test_meqsum_pairs.json +8 -0
  1471. wisent/examples/scripts/results/test_mercury_evaluation.json +30 -0
  1472. wisent/examples/scripts/results/test_mercury_pairs.json +8 -0
  1473. wisent/examples/scripts/results/test_metabench_evaluation.json +51 -0
  1474. wisent/examples/scripts/results/test_metabench_pairs.json +14 -0
  1475. wisent/examples/scripts/results/test_mgsm_evaluation.json +51 -0
  1476. wisent/examples/scripts/results/test_mgsm_pairs.json +14 -0
  1477. wisent/examples/scripts/results/test_mimic_repsum_evaluation.json +30 -0
  1478. wisent/examples/scripts/results/test_mimic_repsum_pairs.json +8 -0
  1479. wisent/examples/scripts/results/test_minerva_math_evaluation.json +51 -0
  1480. wisent/examples/scripts/results/test_minerva_math_pairs.json +14 -0
  1481. wisent/examples/scripts/results/test_mlqa_evaluation.json +51 -0
  1482. wisent/examples/scripts/results/test_mlqa_pairs.json +14 -0
  1483. wisent/examples/scripts/results/test_mmlu-pro-plus_evaluation.json +51 -0
  1484. wisent/examples/scripts/results/test_mmlu-pro-plus_pairs.json +14 -0
  1485. wisent/examples/scripts/results/test_mmlu_evaluation.json +51 -0
  1486. wisent/examples/scripts/results/test_mmlu_pairs.json +14 -0
  1487. wisent/examples/scripts/results/test_mmlu_pro_evaluation.json +51 -0
  1488. wisent/examples/scripts/results/test_mmlu_pro_pairs.json +14 -0
  1489. wisent/examples/scripts/results/test_mmlu_prox_evaluation.json +51 -0
  1490. wisent/examples/scripts/results/test_mmlu_prox_pairs.json +14 -0
  1491. wisent/examples/scripts/results/test_mmlusr_evaluation.json +30 -0
  1492. wisent/examples/scripts/results/test_mmlusr_pairs.json +8 -0
  1493. wisent/examples/scripts/results/test_mmmu_evaluation.json +51 -0
  1494. wisent/examples/scripts/results/test_mmmu_pairs.json +14 -0
  1495. wisent/examples/scripts/results/test_mnli_evaluation.json +30 -0
  1496. wisent/examples/scripts/results/test_mnli_pairs.json +8 -0
  1497. wisent/examples/scripts/results/test_model_written_evals_evaluation.json +51 -0
  1498. wisent/examples/scripts/results/test_model_written_evals_pairs.json +14 -0
  1499. wisent/examples/scripts/results/test_moral_stories_evaluation.json +30 -0
  1500. wisent/examples/scripts/results/test_moral_stories_pairs.json +8 -0
  1501. wisent/examples/scripts/results/test_mts_dialog_evaluation.json +30 -0
  1502. wisent/examples/scripts/results/test_mts_dialog_pairs.json +8 -0
  1503. wisent/examples/scripts/results/test_multiblimp_evaluation.json +51 -0
  1504. wisent/examples/scripts/results/test_multiblimp_pairs.json +14 -0
  1505. wisent/examples/scripts/results/test_multimedqa_evaluation.json +51 -0
  1506. wisent/examples/scripts/results/test_multimedqa_pairs.json +14 -0
  1507. wisent/examples/scripts/results/test_multipl_e_evaluation.json +30 -0
  1508. wisent/examples/scripts/results/test_multipl_e_pairs.json +8 -0
  1509. wisent/examples/scripts/results/test_mutual_evaluation.json +30 -0
  1510. wisent/examples/scripts/results/test_mutual_pairs.json +8 -0
  1511. wisent/examples/scripts/results/test_non_greedy_robustness_agieval_aqua_rat_evaluation.json +30 -0
  1512. wisent/examples/scripts/results/test_non_greedy_robustness_agieval_aqua_rat_pairs.json +8 -0
  1513. wisent/examples/scripts/results/test_noreval_evaluation.json +51 -0
  1514. wisent/examples/scripts/results/test_noreval_pairs.json +14 -0
  1515. wisent/examples/scripts/results/test_noticia_evaluation.json +30 -0
  1516. wisent/examples/scripts/results/test_noticia_pairs.json +8 -0
  1517. wisent/examples/scripts/results/test_nq_open_evaluation.json +30 -0
  1518. wisent/examples/scripts/results/test_nq_open_pairs.json +8 -0
  1519. wisent/examples/scripts/results/test_olaph_evaluation.json +30 -0
  1520. wisent/examples/scripts/results/test_olaph_pairs.json +8 -0
  1521. wisent/examples/scripts/results/test_openbookqa_evaluation.json +30 -0
  1522. wisent/examples/scripts/results/test_openbookqa_pairs.json +8 -0
  1523. wisent/examples/scripts/results/test_openllm_evaluation.json +51 -0
  1524. wisent/examples/scripts/results/test_openllm_pairs.json +14 -0
  1525. wisent/examples/scripts/results/test_option_order_robustness_agieval_aqua_rat_evaluation.json +30 -0
  1526. wisent/examples/scripts/results/test_option_order_robustness_agieval_aqua_rat_pairs.json +8 -0
  1527. wisent/examples/scripts/results/test_paloma_evaluation.json +51 -0
  1528. wisent/examples/scripts/results/test_paloma_pairs.json +14 -0
  1529. wisent/examples/scripts/results/test_passkey/test_passkey_evaluation.json +30 -0
  1530. wisent/examples/scripts/results/test_passkey/test_passkey_pairs.json +8 -0
  1531. wisent/examples/scripts/results/test_paws-x_evaluation.json +51 -0
  1532. wisent/examples/scripts/results/test_paws-x_pairs.json +14 -0
  1533. wisent/examples/scripts/results/test_paws_en/test_paws_en_evaluation.json +30 -0
  1534. wisent/examples/scripts/results/test_paws_en/test_paws_en_pairs.json +8 -0
  1535. wisent/examples/scripts/results/test_penn_treebank_evaluation.json +30 -0
  1536. wisent/examples/scripts/results/test_penn_treebank_pairs.json +8 -0
  1537. wisent/examples/scripts/results/test_pile_10k/test_pile_10k_evaluation.json +30 -0
  1538. wisent/examples/scripts/results/test_pile_10k/test_pile_10k_pairs.json +8 -0
  1539. wisent/examples/scripts/results/test_piqa_evaluation.json +30 -0
  1540. wisent/examples/scripts/results/test_piqa_pairs.json +8 -0
  1541. wisent/examples/scripts/results/test_polemo2_evaluation.json +30 -0
  1542. wisent/examples/scripts/results/test_polemo2_pairs.json +8 -0
  1543. wisent/examples/scripts/results/test_polymath_en_high_evaluation.json +30 -0
  1544. wisent/examples/scripts/results/test_polymath_en_high_pairs.json +8 -0
  1545. wisent/examples/scripts/results/test_polymath_en_medium_evaluation.json +30 -0
  1546. wisent/examples/scripts/results/test_polymath_en_medium_pairs.json +8 -0
  1547. wisent/examples/scripts/results/test_polymath_zh_high_evaluation.json +30 -0
  1548. wisent/examples/scripts/results/test_polymath_zh_high_pairs.json +8 -0
  1549. wisent/examples/scripts/results/test_polymath_zh_medium_evaluation.json +30 -0
  1550. wisent/examples/scripts/results/test_polymath_zh_medium_pairs.json +8 -0
  1551. wisent/examples/scripts/results/test_portuguese_bench_evaluation.json +51 -0
  1552. wisent/examples/scripts/results/test_portuguese_bench_pairs.json +14 -0
  1553. wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat/test_prompt_robustness_agieval_aqua_rat_evaluation.json +30 -0
  1554. wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat/test_prompt_robustness_agieval_aqua_rat_pairs.json +8 -0
  1555. wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat_evaluation.json +30 -0
  1556. wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat_pairs.json +8 -0
  1557. wisent/examples/scripts/results/test_prost_evaluation.json +30 -0
  1558. wisent/examples/scripts/results/test_prost_pairs.json +8 -0
  1559. wisent/examples/scripts/results/test_ptb_evaluation.json +30 -0
  1560. wisent/examples/scripts/results/test_ptb_pairs.json +8 -0
  1561. wisent/examples/scripts/results/test_pubmedqa_evaluation.json +30 -0
  1562. wisent/examples/scripts/results/test_pubmedqa_pairs.json +8 -0
  1563. wisent/examples/scripts/results/test_pythia_evaluation.json +51 -0
  1564. wisent/examples/scripts/results/test_pythia_pairs.json +14 -0
  1565. wisent/examples/scripts/results/test_qa4mre_evaluation.json +30 -0
  1566. wisent/examples/scripts/results/test_qa4mre_pairs.json +8 -0
  1567. wisent/examples/scripts/results/test_qasper_evaluation.json +30 -0
  1568. wisent/examples/scripts/results/test_qasper_pairs.json +8 -0
  1569. wisent/examples/scripts/results/test_race_evaluation.json +30 -0
  1570. wisent/examples/scripts/results/test_race_pairs.json +8 -0
  1571. wisent/examples/scripts/results/test_realtoxicityprompts_evaluation.json +30 -0
  1572. wisent/examples/scripts/results/test_realtoxicityprompts_pairs.json +8 -0
  1573. wisent/examples/scripts/results/test_recode_evaluation.json +30 -0
  1574. wisent/examples/scripts/results/test_recode_pairs.json +8 -0
  1575. wisent/examples/scripts/results/test_record_evaluation.json +30 -0
  1576. wisent/examples/scripts/results/test_record_pairs.json +8 -0
  1577. wisent/examples/scripts/results/test_ruler_evaluation.json +51 -0
  1578. wisent/examples/scripts/results/test_ruler_pairs.json +14 -0
  1579. wisent/examples/scripts/results/test_sciq_evaluation.json +30 -0
  1580. wisent/examples/scripts/results/test_sciq_pairs.json +8 -0
  1581. wisent/examples/scripts/results/test_score_evaluation.json +51 -0
  1582. wisent/examples/scripts/results/test_score_pairs.json +14 -0
  1583. wisent/examples/scripts/results/test_self_consistency_evaluation.json +30 -0
  1584. wisent/examples/scripts/results/test_self_consistency_pairs.json +8 -0
  1585. wisent/examples/scripts/results/test_siqa/test_siqa_evaluation.json +30 -0
  1586. wisent/examples/scripts/results/test_siqa/test_siqa_pairs.json +8 -0
  1587. wisent/examples/scripts/results/test_siqa_evaluation.json +30 -0
  1588. wisent/examples/scripts/results/test_siqa_pairs.json +8 -0
  1589. wisent/examples/scripts/results/test_spanish_bench_evaluation.json +51 -0
  1590. wisent/examples/scripts/results/test_spanish_bench_pairs.json +14 -0
  1591. wisent/examples/scripts/results/test_squad2_evaluation.json +30 -0
  1592. wisent/examples/scripts/results/test_squad2_pairs.json +8 -0
  1593. wisent/examples/scripts/results/test_squadv2_evaluation.json +30 -0
  1594. wisent/examples/scripts/results/test_squadv2_pairs.json +8 -0
  1595. wisent/examples/scripts/results/test_super-glue-lm-eval-v1-seq2seq_evaluation.json +30 -0
  1596. wisent/examples/scripts/results/test_super-glue-lm-eval-v1-seq2seq_pairs.json +8 -0
  1597. wisent/examples/scripts/results/test_super-glue-lm-eval-v1_evaluation.json +51 -0
  1598. wisent/examples/scripts/results/test_super-glue-lm-eval-v1_pairs.json +14 -0
  1599. wisent/examples/scripts/results/test_swag_evaluation.json +30 -0
  1600. wisent/examples/scripts/results/test_swag_pairs.json +8 -0
  1601. wisent/examples/scripts/results/test_tinyBenchmarks_evaluation.json +51 -0
  1602. wisent/examples/scripts/results/test_tinyBenchmarks_pairs.json +14 -0
  1603. wisent/examples/scripts/results/test_tmmluplus_evaluation.json +51 -0
  1604. wisent/examples/scripts/results/test_tmmluplus_pairs.json +14 -0
  1605. wisent/examples/scripts/results/test_translation_evaluation.json +51 -0
  1606. wisent/examples/scripts/results/test_translation_pairs.json +14 -0
  1607. wisent/examples/scripts/results/test_triviaqa_evaluation.json +30 -0
  1608. wisent/examples/scripts/results/test_triviaqa_pairs.json +8 -0
  1609. wisent/examples/scripts/results/test_truthfulqa-multi_evaluation.json +51 -0
  1610. wisent/examples/scripts/results/test_truthfulqa-multi_pairs.json +14 -0
  1611. wisent/examples/scripts/results/test_truthfulqa_evaluation.json +30 -0
  1612. wisent/examples/scripts/results/test_truthfulqa_mc1_evaluation.json +30 -0
  1613. wisent/examples/scripts/results/test_truthfulqa_mc1_pairs.json +8 -0
  1614. wisent/examples/scripts/results/test_truthfulqa_mc2_evaluation.json +30 -0
  1615. wisent/examples/scripts/results/test_truthfulqa_mc2_pairs.json +8 -0
  1616. wisent/examples/scripts/results/test_truthfulqa_pairs.json +8 -0
  1617. wisent/examples/scripts/results/test_turkishmmlu_evaluation.json +51 -0
  1618. wisent/examples/scripts/results/test_turkishmmlu_pairs.json +14 -0
  1619. wisent/examples/scripts/results/test_unfair_tos_evaluation.json +30 -0
  1620. wisent/examples/scripts/results/test_unfair_tos_pairs.json +8 -0
  1621. wisent/examples/scripts/results/test_unscramble_evaluation.json +51 -0
  1622. wisent/examples/scripts/results/test_unscramble_pairs.json +14 -0
  1623. wisent/examples/scripts/results/test_webqs_evaluation.json +30 -0
  1624. wisent/examples/scripts/results/test_webqs_pairs.json +8 -0
  1625. wisent/examples/scripts/results/test_wikitext103_evaluation.json +30 -0
  1626. wisent/examples/scripts/results/test_wikitext103_pairs.json +8 -0
  1627. wisent/examples/scripts/results/test_wikitext_evaluation.json +30 -0
  1628. wisent/examples/scripts/results/test_wikitext_pairs.json +8 -0
  1629. wisent/examples/scripts/results/test_winogender_evaluation.json +51 -0
  1630. wisent/examples/scripts/results/test_winogender_pairs.json +14 -0
  1631. wisent/examples/scripts/results/test_winogrande_evaluation.json +30 -0
  1632. wisent/examples/scripts/results/test_winogrande_pairs.json +8 -0
  1633. wisent/examples/scripts/results/test_wmdp_evaluation.json +30 -0
  1634. wisent/examples/scripts/results/test_wmdp_pairs.json +8 -0
  1635. wisent/examples/scripts/results/test_wmt-ro-en-t5-prompt_evaluation.json +30 -0
  1636. wisent/examples/scripts/results/test_wmt-ro-en-t5-prompt_pairs.json +8 -0
  1637. wisent/examples/scripts/results/test_wmt14_en_fr_evaluation.json +30 -0
  1638. wisent/examples/scripts/results/test_wmt14_en_fr_pairs.json +8 -0
  1639. wisent/examples/scripts/results/test_wmt16_en_de_evaluation.json +30 -0
  1640. wisent/examples/scripts/results/test_wmt16_en_de_pairs.json +8 -0
  1641. wisent/examples/scripts/results/test_wmt16_ro_en_evaluation.json +30 -0
  1642. wisent/examples/scripts/results/test_wmt16_ro_en_pairs.json +8 -0
  1643. wisent/examples/scripts/results/test_wsc273_evaluation.json +30 -0
  1644. wisent/examples/scripts/results/test_wsc273_pairs.json +8 -0
  1645. wisent/examples/scripts/results/test_xcopa_evaluation.json +51 -0
  1646. wisent/examples/scripts/results/test_xcopa_pairs.json +14 -0
  1647. wisent/examples/scripts/results/test_xnli_eu_evaluation.json +30 -0
  1648. wisent/examples/scripts/results/test_xnli_eu_pairs.json +8 -0
  1649. wisent/examples/scripts/results/test_xnli_evaluation.json +51 -0
  1650. wisent/examples/scripts/results/test_xnli_pairs.json +14 -0
  1651. wisent/examples/scripts/results/test_xquad_evaluation.json +51 -0
  1652. wisent/examples/scripts/results/test_xquad_pairs.json +14 -0
  1653. wisent/examples/scripts/results/test_xstorycloze_evaluation.json +51 -0
  1654. wisent/examples/scripts/results/test_xstorycloze_pairs.json +14 -0
  1655. wisent/examples/scripts/results/test_xsum_evaluation.json +30 -0
  1656. wisent/examples/scripts/results/test_xsum_pairs.json +8 -0
  1657. wisent/examples/scripts/results/test_xwinograd_evaluation.json +51 -0
  1658. wisent/examples/scripts/results/test_xwinograd_pairs.json +14 -0
  1659. wisent/examples/scripts/results/test_yahoo_answers_topics_evaluation.json +30 -0
  1660. wisent/examples/scripts/results/test_yahoo_answers_topics_pairs.json +8 -0
  1661. wisent/parameters/__init__.py +1 -0
  1662. wisent/parameters/lm_eval/all_lm_eval_task_families.json +169 -0
  1663. wisent/parameters/lm_eval/broken_in_lm_eval.json +10 -0
  1664. wisent/parameters/lm_eval/evaluations_not_lm_eval_tasks.json +0 -0
  1665. wisent/parameters/lm_eval/evaluator_check.json +3476 -0
  1666. wisent/parameters/lm_eval/final_verification.json +24782 -0
  1667. wisent/parameters/lm_eval/group_task_evaluators.json +1833 -0
  1668. wisent/parameters/lm_eval/group_tasks.json +150 -0
  1669. wisent/parameters/lm_eval/individual_tasks.json +402 -0
  1670. wisent/parameters/lm_eval/no_readmes.json +1 -0
  1671. wisent/parameters/lm_eval/not_lm_eval_tasks.json +110 -0
  1672. wisent/parameters/lm_eval/read_tasks.json +208 -0
  1673. wisent/parameters/lm_eval/readme_files.json +208 -0
  1674. wisent/parameters/lm_eval/track_progress_not_lm_eval_tasks.json +128 -0
  1675. wisent/parameters/tasks/missing_task_families.json +2963 -0
  1676. wisent/parameters/tasks/remaining_tasks_to_implement.json +199 -0
  1677. wisent/parameters/tasks/risks.json +10 -0
  1678. wisent/parameters/tasks/skills.json +14 -0
  1679. wisent/parameters/tasks/tasks.json +56031 -0
  1680. wisent/scripts/run_quality_metrics_sweep.sh +315 -0
  1681. wisent/tests/__init__.py +0 -0
  1682. wisent/tests/examples/__init__.py +0 -0
  1683. wisent/tests/examples/cli/__init__.py +0 -0
  1684. wisent/tests/examples/cli/activations/__init__.py +0 -0
  1685. wisent/tests/examples/cli/activations/test_get_activations.py +127 -0
  1686. wisent/tests/examples/cli/classifier/__init__.py +0 -0
  1687. wisent/tests/examples/cli/classifier/test_classifier_examples.py +141 -0
  1688. wisent/tests/examples/cli/contrastive_pairs/__init__.py +0 -0
  1689. wisent/tests/examples/cli/contrastive_pairs/test_generate_pairs.py +89 -0
  1690. wisent/tests/examples/cli/evaluation/__init__.py +0 -0
  1691. wisent/tests/examples/cli/evaluation/test_evaluation_examples.py +117 -0
  1692. wisent/tests/examples/cli/generate/__init__.py +0 -0
  1693. wisent/tests/examples/cli/generate/test_generate_with_classifier.py +146 -0
  1694. wisent/tests/examples/cli/generate/test_generate_with_steering.py +149 -0
  1695. wisent/tests/examples/cli/generate/test_only_generate.py +110 -0
  1696. wisent/tests/examples/cli/multi_steering/__init__.py +0 -0
  1697. wisent/tests/examples/cli/multi_steering/test_multi_steer_from_trained_vectors.py +210 -0
  1698. wisent/tests/examples/cli/multi_steering/test_multi_steer_with_different_parameters.py +205 -0
  1699. wisent/tests/examples/cli/multi_steering/test_train_and_multi_steer.py +174 -0
  1700. wisent/tests/examples/cli/optimizer/__init__.py +0 -0
  1701. wisent/tests/examples/cli/optimizer/test_optimize_sample_size.py +102 -0
  1702. wisent/tests/examples/cli/optimizer/test_optimizer_examples.py +59 -0
  1703. wisent/tests/examples/cli/steering/__init__.py +0 -0
  1704. wisent/tests/examples/cli/steering/test_create_steering_vectors.py +135 -0
  1705. wisent/tests/examples/cli/synthetic/__init__.py +0 -0
  1706. wisent/tests/examples/cli/synthetic/test_synthetic_pairs.py +45 -0
  1707. wisent/tests/nosense/__init__.py +6 -0
  1708. wisent/tests/nosense/base_nosense.py +81 -0
  1709. wisent/tests/nosense/math500_nosense.py +72 -0
  1710. wisent/tests/nosense/test_robustness.py +336 -0
  1711. wisent/tests/test_all_cli_commands.py +674 -0
  1712. wisent/tests/test_geometry_comprehensive.py +327 -0
  1713. wisent/tests/test_titan_geometry.py +257 -0
  1714. wisent/tests/visualize_geometry.py +148 -0
  1715. wisent-0.7.379.dist-info/METADATA +64 -0
  1716. wisent-0.7.379.dist-info/RECORD +1720 -0
  1717. wisent-0.7.379.dist-info/WHEEL +5 -0
  1718. wisent-0.7.379.dist-info/entry_points.txt +2 -0
  1719. wisent-0.7.379.dist-info/licenses/LICENSE +21 -0
  1720. wisent-0.7.379.dist-info/top_level.txt +1 -0
@@ -0,0 +1,3421 @@
1
+ """Steering optimization command execution logic with full strategy optimization.
2
+
3
+ Results are persisted to ~/.wisent/configs/ via WisentConfigManager
4
+ so they can be automatically loaded on subsequent runs.
5
+
6
+ Supports two search strategies:
7
+ - grid: Exhaustive search over all configurations (thorough but slow)
8
+ - optuna: TPE sampling with early stopping (fast but may miss optimal)
9
+ """
10
+
11
+ import json
12
+ import sys
13
+ import time
14
+
15
+ import numpy as np
16
+
17
+ from wisent.core.evaluators.rotator import EvaluatorRotator
18
+ from wisent.core.models.inference_config import get_generate_kwargs
19
+ from wisent.core.config_manager import (
20
+ get_config_manager,
21
+ save_steering_config,
22
+ get_steering_config,
23
+ get_cached_optimization,
24
+ store_optimization,
25
+ SteeringConfig,
26
+ )
27
+
28
+
29
+ def _run_optuna_search_for_task(
30
+ model,
31
+ train_pairs,
32
+ test_pairs,
33
+ evaluator,
34
+ task_name,
35
+ search_space,
36
+ args,
37
+ baseline_results=None,
38
+ ):
39
+ """
40
+ Run Optuna-based hyperparameter search for a single task.
41
+
42
+ Returns:
43
+ dict: Best configuration found with score and parameters
44
+ """
45
+ import optuna
46
+ from optuna.samplers import TPESampler
47
+ from optuna.pruners import MedianPruner
48
+
49
+ from wisent.core.activations.activations_collector import ActivationCollector
50
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
51
+ from wisent.core.models.core.atoms import SteeringPlan
52
+ from wisent.core.cli.steering_method_trainer import create_steering_method
53
+
54
+ n_trials = getattr(args, 'n_trials', 50)
55
+ n_startup_trials = getattr(args, 'n_startup_trials', 10)
56
+
57
+ # Maps for converting string values to enums
58
+ token_agg_map = {
59
+ "last_token": ActivationAggregationStrategy.LAST_TOKEN,
60
+ "mean_pooling": ActivationAggregationStrategy.MEAN_POOLING,
61
+ "first_token": ActivationAggregationStrategy.FIRST_TOKEN,
62
+ "max_pooling": ActivationAggregationStrategy.MAX_POOLING,
63
+ }
64
+
65
+ def objective(trial):
66
+ """Optuna objective function for steering optimization."""
67
+ # Sample hyperparameters
68
+ layer = trial.suggest_int("layer", min(search_space.layers), max(search_space.layers))
69
+ strength = trial.suggest_float("strength", min(search_space.strengths), max(search_space.strengths), log=True)
70
+ strategy = trial.suggest_categorical("strategy", search_space.strategies)
71
+ token_agg_name = trial.suggest_categorical("token_aggregation", search_space.token_aggregations)
72
+ token_agg = token_agg_map.get(token_agg_name, ActivationAggregationStrategy.LAST_TOKEN)
73
+
74
+ layer_str = str(layer)
75
+
76
+ try:
77
+ # Collect activations
78
+ collector = ActivationCollector(model=model, store_device="cpu")
79
+ pos_acts = []
80
+ neg_acts = []
81
+
82
+ for pair in train_pairs.pairs:
83
+ updated_pair = collector.collect_for_pair(
84
+ pair,
85
+ layers=[layer_str],
86
+ aggregation=token_agg,
87
+ return_full_sequence=False,
88
+ normalize_layers=False,
89
+ )
90
+
91
+ if (updated_pair.positive_response.layers_activations
92
+ and layer_str in updated_pair.positive_response.layers_activations):
93
+ act = updated_pair.positive_response.layers_activations[layer_str]
94
+ if act is not None:
95
+ pos_acts.append(act)
96
+
97
+ if (updated_pair.negative_response.layers_activations
98
+ and layer_str in updated_pair.negative_response.layers_activations):
99
+ act = updated_pair.negative_response.layers_activations[layer_str]
100
+ if act is not None:
101
+ neg_acts.append(act)
102
+
103
+ if len(pos_acts) == 0 or len(neg_acts) == 0:
104
+ return 0.0
105
+
106
+ # Train steering vector
107
+ method_name = args.methods[0] if args.methods else "CAA"
108
+ steering_method = create_steering_method(method_name, args)
109
+ import torch
110
+ pos_tensor = torch.stack(pos_acts).mean(dim=0)
111
+ neg_tensor = torch.stack(neg_acts).mean(dim=0)
112
+ steering_vector = steering_method.train_for_layer(pos_tensor, neg_tensor)
113
+
114
+ # Create steering plan
115
+ steering_plan = SteeringPlan.from_raw(
116
+ raw={layer_str: steering_vector},
117
+ scale=strength,
118
+ normalize=False
119
+ )
120
+
121
+ # Evaluate on test set
122
+ correct = 0
123
+ total = 0
124
+
125
+ for pair in test_pairs.pairs:
126
+ try:
127
+ choices = [pair.negative_response.model_response, pair.positive_response.model_response]
128
+ expected = pair.positive_response.model_response
129
+ test_code = pair.metadata.get("test_code") if pair.metadata else None
130
+
131
+ eval_result = evaluator.evaluate(
132
+ response="",
133
+ expected=expected,
134
+ model=model,
135
+ question=pair.prompt,
136
+ choices=choices,
137
+ steering_plan=steering_plan,
138
+ test_code=test_code,
139
+ task_name=task_name,
140
+ )
141
+
142
+ if eval_result.ground_truth == "TRUTHFUL":
143
+ correct += 1
144
+ total += 1
145
+ except Exception:
146
+ total += 1
147
+
148
+ accuracy = correct / total if total > 0 else 0.0
149
+ return accuracy
150
+
151
+ except Exception as e:
152
+ print(f" Trial {trial.number} failed: {e}")
153
+ return 0.0
154
+
155
+ # Create and run study
156
+ sampler = TPESampler(seed=42, n_startup_trials=n_startup_trials)
157
+ pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=3)
158
+
159
+ study = optuna.create_study(
160
+ direction="maximize",
161
+ sampler=sampler,
162
+ pruner=pruner,
163
+ )
164
+
165
+ print(f" šŸ” Running Optuna optimization ({n_trials} trials)...")
166
+
167
+ # Suppress Optuna logs for cleaner output
168
+ optuna.logging.set_verbosity(optuna.logging.WARNING)
169
+
170
+ study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
171
+
172
+ best_trial = study.best_trial
173
+
174
+ return {
175
+ "best_score": best_trial.value,
176
+ "best_layer": best_trial.params["layer"],
177
+ "best_strength": best_trial.params["strength"],
178
+ "best_strategy": best_trial.params["strategy"],
179
+ "best_token_aggregation": best_trial.params["token_aggregation"],
180
+ "n_trials": len(study.trials),
181
+ "search_strategy": "optuna",
182
+ }
183
+
184
+
185
+ def execute_optimize_steering(args):
186
+ """
187
+ Execute the optimize-steering command.
188
+
189
+ Supports multiple subcommands:
190
+ - comprehensive: Run comprehensive steering optimization
191
+ - compare-methods: Compare different steering methods
192
+ - optimize-layer: Find optimal steering layer
193
+ - optimize-strength: Find optimal steering strength
194
+ - auto: Automatically optimize based on classification config
195
+ """
196
+ from wisent.core.data_loaders.loaders.lm_loader import LMEvalDataLoader
197
+ from wisent.core.models.wisent_model import WisentModel
198
+
199
+ # Check which subcommand was called
200
+ if not hasattr(args, "steering_action") or args.steering_action is None:
201
+ print("\nāœ— No steering optimization action specified")
202
+ print("Available actions: comprehensive, compare-methods, optimize-layer, optimize-strength, auto")
203
+ sys.exit(1)
204
+
205
+ print(f"\n{'=' * 80}")
206
+ print(f"šŸŽÆ STEERING PARAMETER OPTIMIZATION: {args.steering_action.upper()}")
207
+ print(f"{'=' * 80}")
208
+ print(f" Model: {args.model}")
209
+ print(f" Device: {args.device or 'auto'}")
210
+ print(f"{'=' * 80}\n")
211
+
212
+ # Load model
213
+ print("šŸ“¦ Loading model...")
214
+ model = WisentModel(args.model, device=args.device)
215
+ print(f" āœ“ Model loaded with {model.num_layers} layers\n")
216
+
217
+ # Initialize data loader
218
+ loader = LMEvalDataLoader()
219
+
220
+ # Execute based on subcommand and return results
221
+ if args.steering_action == "comprehensive":
222
+ return execute_comprehensive(args, model, loader)
223
+ if args.steering_action == "compare-methods":
224
+ return execute_compare_methods(args, model, loader)
225
+ if args.steering_action == "optimize-layer":
226
+ return execute_optimize_layer(args, model, loader)
227
+ if args.steering_action == "optimize-strength":
228
+ return execute_optimize_strength(args, model, loader)
229
+ if args.steering_action == "auto":
230
+ return execute_auto(args, model, loader)
231
+ if args.steering_action == "personalization":
232
+ return execute_personalization(args, model)
233
+ if args.steering_action == "multi-personalization":
234
+ return execute_multi_personalization(args, model)
235
+ print(f"\nāœ— Unknown steering action: {args.steering_action}")
236
+ sys.exit(1)
237
+
238
+
239
+ def execute_comprehensive(args, model, loader):
240
+ """Execute comprehensive steering optimization with generation-based evaluation."""
241
+ import torch
242
+
243
+ from wisent.core.activations.activations_collector import ActivationCollector
244
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
245
+ from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
246
+ from wisent.core.models.core.atoms import SteeringPlan
247
+ from wisent.core.cli.steering_method_trainer import create_steering_method
248
+ from wisent.core.cli.steering_search_space import (
249
+ get_search_space_from_args,
250
+ print_search_space_summary,
251
+ CAASearchSpace,
252
+ PRISMSearchSpace,
253
+ PULSESearchSpace,
254
+ TITANSearchSpace,
255
+ )
256
+
257
+ print("šŸ” Running comprehensive steering optimization...")
258
+ print(" Optimizing: Method-specific search space (layer, strength, strategy, + method params)")
259
+
260
+ # Determine tasks to optimize
261
+ if args.tasks:
262
+ task_list = args.tasks
263
+ else:
264
+ task_list = ["arc_easy", "hellaswag", "winogrande", "gsm8k"]
265
+
266
+ # Check for cached results if --use-cached is specified
267
+ use_cached = getattr(args, "use_cached", False)
268
+ save_as_default = getattr(args, "save_as_default", False)
269
+
270
+ if use_cached:
271
+ print("\nšŸ“¦ Checking optimization cache...")
272
+ cached_results = {}
273
+ tasks_to_run = []
274
+
275
+ for task_name in task_list:
276
+ for method in args.methods:
277
+ cached = get_cached_optimization(args.model, task_name, method)
278
+ if cached:
279
+ print(
280
+ f" āœ“ Found cached result for {task_name}/{method}: layer={cached.layer}, strength={cached.strength}, score={cached.score:.3f}"
281
+ )
282
+ cached_results[f"{task_name}::{method}"] = cached
283
+ else:
284
+ if task_name not in tasks_to_run:
285
+ tasks_to_run.append(task_name)
286
+
287
+ if cached_results and not tasks_to_run:
288
+ print("\nāœ… All tasks have cached results. Returning cached configurations.")
289
+ # Convert cached results to the expected return format
290
+ all_results = {}
291
+ for key, cached in cached_results.items():
292
+ task_name, method = key.split("::")
293
+ if task_name not in all_results:
294
+ all_results[task_name] = {}
295
+ all_results[task_name][method] = {
296
+ "best_layer": cached.layer,
297
+ "best_strength": cached.strength,
298
+ "best_score": cached.score,
299
+ "token_aggregation": cached.token_aggregation,
300
+ "prompt_strategy": cached.prompt_strategy,
301
+ "from_cache": True,
302
+ }
303
+ return all_results
304
+
305
+ if tasks_to_run:
306
+ print(f" Tasks needing optimization: {', '.join(tasks_to_run)}")
307
+ task_list = tasks_to_run
308
+
309
+ print(f" Tasks: {', '.join(task_list)}")
310
+ print(f" Methods: {', '.join(args.methods)}")
311
+ print(f" Limit: {args.limit} samples per task")
312
+ quick_search = getattr(args, 'quick_search', False)
313
+ print(f" Quick search: {quick_search}")
314
+
315
+ # Search strategy
316
+ search_strategy = getattr(args, 'search_strategy', 'grid')
317
+ n_trials = getattr(args, 'n_trials', 50)
318
+ print(f" Search strategy: {search_strategy}" + (f" ({n_trials} trials)" if search_strategy == "optuna" else ""))
319
+ print(" Time limit: DISABLED (no time limit)\n")
320
+
321
+ all_results = {}
322
+
323
+ # Get search spaces for each method and print summary
324
+ method_search_spaces = {}
325
+ total_all_methods = 0
326
+ for method_name in args.methods:
327
+ search_space = get_search_space_from_args(method_name, args, model.num_layers)
328
+ method_search_spaces[method_name] = search_space
329
+ print_search_space_summary(search_space, method_name)
330
+ total_all_methods += search_space.get_total_configs()
331
+
332
+ print(f"\n Total configurations across all methods: {total_all_methods:,}\n")
333
+
334
+ # For backward compatibility, also set up the legacy variables
335
+ # These are used by some code paths that haven't been fully migrated
336
+ first_method = args.methods[0] if args.methods else "CAA"
337
+ first_space = method_search_spaces.get(first_method)
338
+ if isinstance(first_space, (CAASearchSpace, PRISMSearchSpace)):
339
+ layers_to_test = first_space.layers
340
+ else:
341
+ # PULSE/TITAN don't use direct layers, compute defaults
342
+ layers_to_test = list(range(model.num_layers // 2, model.num_layers - 2, 2))
343
+
344
+ strengths_to_test = first_space.strengths if first_space else [0.5, 1.0, 1.5, 2.0]
345
+ strategies_to_test = first_space.strategies if first_space else ["constant", "initial_only", "diminishing"]
346
+
347
+ # Convert string token aggregations to enum
348
+ token_agg_map = {
349
+ "last_token": ActivationAggregationStrategy.LAST_TOKEN,
350
+ "mean_pooling": ActivationAggregationStrategy.MEAN_POOLING,
351
+ "first_token": ActivationAggregationStrategy.FIRST_TOKEN,
352
+ "max_pooling": ActivationAggregationStrategy.MAX_POOLING,
353
+ "choice_token": ActivationAggregationStrategy.CHOICE_TOKEN,
354
+ "continuation_token": ActivationAggregationStrategy.CONTINUATION_TOKEN,
355
+ }
356
+ token_aggregations_to_test = [
357
+ token_agg_map.get(t, ActivationAggregationStrategy.LAST_TOKEN)
358
+ for t in (first_space.token_aggregations if first_space else ["last_token", "mean_pooling"])
359
+ ]
360
+
361
+ # Convert string prompt constructions to enum
362
+ prompt_const_map = {
363
+ "chat_template": PromptConstructionStrategy.CHAT_TEMPLATE,
364
+ "direct_completion": PromptConstructionStrategy.DIRECT_COMPLETION,
365
+ "multiple_choice": PromptConstructionStrategy.MULTIPLE_CHOICE,
366
+ "role_playing": PromptConstructionStrategy.ROLE_PLAYING,
367
+ "instruction_following": PromptConstructionStrategy.INSTRUCTION_FOLLOWING,
368
+ }
369
+ prompt_constructions_to_test = [
370
+ prompt_const_map.get(p, PromptConstructionStrategy.CHAT_TEMPLATE)
371
+ for p in (first_space.prompt_constructions if first_space else ["chat_template", "direct_completion"])
372
+ ]
373
+
374
+ # For legacy code paths
375
+ total_configs = first_space.get_total_configs() if first_space else 100
376
+
377
+ for task_idx, task_name in enumerate(task_list, 1):
378
+ print(f"\n{'=' * 80}")
379
+ print(f"Task {task_idx}/{len(task_list)}: {task_name}")
380
+ print(f"{'=' * 80}")
381
+
382
+ task_start_time = time.time()
383
+
384
+ try:
385
+ # Load task data
386
+ print(" šŸ“Š Loading task data...")
387
+ result = loader._load_one_task(
388
+ task_name=task_name, split_ratio=0.8, seed=42, limit=args.limit, training_limit=None, testing_limit=None
389
+ )
390
+
391
+ train_pairs = result["train_qa_pairs"]
392
+ test_pairs = result["test_qa_pairs"]
393
+
394
+ print(f" āœ“ Loaded {len(train_pairs.pairs)} train, {len(test_pairs.pairs)} test pairs")
395
+
396
+ # Initialize evaluator for this task (auto-select based on task_name)
397
+ EvaluatorRotator.discover_evaluators("wisent.core.evaluators.benchmark_specific")
398
+ evaluator = EvaluatorRotator(evaluator=None, task_name=task_name) # None = auto-select
399
+ print(f" āœ“ Using evaluator: {evaluator._plugin.name} (auto-selected for {task_name})")
400
+
401
+ # Compute baseline (unsteered) results if requested
402
+ baseline_results = {}
403
+ if hasattr(args, "compute_baseline") and args.compute_baseline:
404
+ print("\n šŸ“Š Computing BASELINE (unsteered) accuracy...")
405
+ baseline_scores = []
406
+ baseline_per_problem = []
407
+
408
+ for pair_idx, pair in enumerate(test_pairs.pairs):
409
+ try:
410
+ # Prepare choices for multiple choice evaluation
411
+ choices = [pair.negative_response.model_response, pair.positive_response.model_response]
412
+ expected = pair.positive_response.model_response
413
+
414
+ # Evaluate WITHOUT steering
415
+ test_code = pair.metadata.get("test_code") if pair.metadata else None
416
+ eval_result = evaluator.evaluate(
417
+ response="",
418
+ expected=expected,
419
+ model=model,
420
+ question=pair.prompt,
421
+ choices=choices,
422
+ steering_plan=None, # No steering for baseline
423
+ test_code=test_code,
424
+ task_name=task_name,
425
+ )
426
+
427
+ is_correct = eval_result.ground_truth == "TRUTHFUL"
428
+ baseline_scores.append(1.0 if is_correct else 0.0)
429
+
430
+ # Store per-problem baseline result with details
431
+ baseline_per_problem.append(
432
+ {
433
+ "pair_index": pair_idx,
434
+ "prompt": pair.prompt,
435
+ "expected": expected,
436
+ "baseline_correct": is_correct,
437
+ "ground_truth": eval_result.ground_truth,
438
+ "method_used": eval_result.method_used,
439
+ "confidence": eval_result.confidence,
440
+ }
441
+ )
442
+
443
+ if (pair_idx + 1) % 10 == 0:
444
+ print(
445
+ f" Evaluated {pair_idx + 1}/{len(test_pairs.pairs)} baseline samples...", end="\r"
446
+ )
447
+
448
+ except Exception as e:
449
+ print(f"\nāŒ Baseline evaluation failed for pair {pair_idx}:")
450
+ print(f" Error: {e}")
451
+ raise
452
+
453
+ baseline_accuracy = np.mean(baseline_scores) if baseline_scores else 0.0
454
+ print(
455
+ f"\n āœ“ Baseline accuracy: {baseline_accuracy:.3f} ({sum(baseline_scores):.0f}/{len(baseline_scores)} correct)"
456
+ )
457
+
458
+ baseline_results = {
459
+ "accuracy": baseline_accuracy,
460
+ "per_problem": baseline_per_problem,
461
+ "num_correct": int(sum(baseline_scores)),
462
+ "num_total": len(baseline_scores),
463
+ }
464
+
465
+ # Dispatch based on search strategy
466
+ if search_strategy == "optuna":
467
+ # Use Optuna-based search
468
+ optuna_result = _run_optuna_search_for_task(
469
+ model=model,
470
+ train_pairs=train_pairs,
471
+ test_pairs=test_pairs,
472
+ evaluator=evaluator,
473
+ task_name=task_name,
474
+ search_space=first_space,
475
+ args=args,
476
+ baseline_results=baseline_results,
477
+ )
478
+
479
+ best_score = optuna_result["best_score"]
480
+ best_config = {
481
+ "layer": optuna_result["best_layer"],
482
+ "strength": optuna_result["best_strength"],
483
+ "strategy": optuna_result["best_strategy"],
484
+ "token_aggregation": optuna_result["best_token_aggregation"],
485
+ }
486
+
487
+ print(f" Best: layer={best_config['layer']}, strength={best_config['strength']:.2f}, "
488
+ f"strategy={best_config['strategy']}, token_agg={best_config['token_aggregation']}")
489
+ print(f" Score: {best_score:.4f} (from {optuna_result['n_trials']} trials)")
490
+
491
+ # Store results in format compatible with grid search
492
+ method_results = {
493
+ first_method: {
494
+ "best_score": best_score,
495
+ "best_layer": best_config["layer"],
496
+ "best_strength": best_config["strength"],
497
+ "best_strategy": best_config["strategy"],
498
+ "token_aggregation": best_config["token_aggregation"],
499
+ "search_strategy": "optuna",
500
+ }
501
+ }
502
+
503
+ # Skip the grid search loop - jump to result saving
504
+ all_results[task_name] = method_results
505
+
506
+ if not args.no_save:
507
+ save_steering_config(
508
+ model_name=args.model,
509
+ task=task_name,
510
+ layer=best_config["layer"],
511
+ strength=best_config["strength"],
512
+ method=first_method,
513
+ strategy=best_config["strategy"],
514
+ token_aggregation=best_config["token_aggregation"],
515
+ )
516
+ store_optimization(
517
+ model=args.model,
518
+ task=task_name,
519
+ layer=best_config["layer"],
520
+ strength=best_config["strength"],
521
+ method=first_method,
522
+ strategy=best_config["strategy"],
523
+ score=best_score,
524
+ metric="accuracy",
525
+ )
526
+
527
+ continue # Skip to next task
528
+
529
+ # Grid search (original behavior)
530
+ print(
531
+ "\n šŸ” Testing CAA method across layers, strengths, strategies, token aggregations, prompt constructions..."
532
+ )
533
+ print(f" Total configurations: {total_configs}")
534
+
535
+ best_score = 0
536
+ best_config = None
537
+ method_results = {}
538
+ configs_tested = 0
539
+ all_generation_examples = [] # Store generation examples for all configs
540
+
541
+ # Prepare test prompts if generating examples for all configs
542
+ if args.save_all_generation_examples or args.save_generation_examples:
543
+ num_examples = min(args.num_generation_examples, len(test_pairs.pairs))
544
+ example_pairs = test_pairs.pairs[:num_examples]
545
+ print(f" šŸ“ Will generate {num_examples} example responses per configuration")
546
+
547
+ for layer in layers_to_test:
548
+ for strength in strengths_to_test:
549
+ for strategy in strategies_to_test:
550
+ for token_agg in token_aggregations_to_test:
551
+ for prompt_const in prompt_constructions_to_test:
552
+ # Time limit disabled - run all configurations
553
+
554
+ try:
555
+ configs_tested += 1
556
+ layer_str = str(layer)
557
+
558
+ # Step 1: Generate steering vector using CAA with current token aggregation
559
+ collector = ActivationCollector(model=model, store_device="cpu")
560
+
561
+ pos_acts = []
562
+ neg_acts = []
563
+
564
+ for pair in train_pairs.pairs:
565
+ updated_pair = collector.collect_for_pair(
566
+ pair,
567
+ layers=[layer_str],
568
+ aggregation=token_agg, # Use current token aggregation strategy
569
+ return_full_sequence=False,
570
+ normalize_layers=False,
571
+ )
572
+
573
+ if (
574
+ updated_pair.positive_response.layers_activations
575
+ and layer_str in updated_pair.positive_response.layers_activations
576
+ ):
577
+ act = updated_pair.positive_response.layers_activations[layer_str]
578
+ if act is not None:
579
+ pos_acts.append(act)
580
+
581
+ if (
582
+ updated_pair.negative_response.layers_activations
583
+ and layer_str in updated_pair.negative_response.layers_activations
584
+ ):
585
+ act = updated_pair.negative_response.layers_activations[layer_str]
586
+ if act is not None:
587
+ neg_acts.append(act)
588
+
589
+ if len(pos_acts) == 0 or len(neg_acts) == 0:
590
+ continue
591
+
592
+ # Create CAA steering vector
593
+ # Use the selected method (first from args.methods or default to CAA)
594
+ method_name = args.methods[0] if args.methods else "CAA"
595
+ steering_method = create_steering_method(method_name, args)
596
+ steering_vector = steering_method.train_for_layer(pos_acts, neg_acts)
597
+
598
+ # Step 2: Evaluate with ACTUAL GENERATION and task evaluator
599
+ # Create steering plan
600
+ from wisent.core.models.core.atoms import SteeringPlan, SteeringVector
601
+
602
+ steering_vec = SteeringVector(vector=steering_vector, scale=strength)
603
+ steering_plan = SteeringPlan(
604
+ layers={layer_str: steering_vec},
605
+ layers_description=[
606
+ f"CAA L{layer} S{strength} {strategy} T:{token_agg.value} P:{prompt_const.value}"
607
+ ],
608
+ )
609
+
610
+ # Apply steering to model
611
+ model.apply_steering(steering_plan)
612
+
613
+ test_scores = []
614
+ detailed_results = [] # Store full evaluation details
615
+ delta_tracking = [] # Track improved/regressed/unchanged per problem
616
+
617
+ for pair_idx, pair in enumerate(test_pairs.pairs):
618
+ try:
619
+ # Prepare choices for multiple choice evaluation
620
+ # ContrastivePair uses: prompt, positive_response.model_response, negative_response.model_response
621
+ choices = [
622
+ pair.negative_response.model_response,
623
+ pair.positive_response.model_response,
624
+ ]
625
+ expected = pair.positive_response.model_response
626
+
627
+ # Use the Wisent evaluator to check correctness
628
+ # The evaluator will use log likelihood if possible,
629
+ # otherwise fall back to generation
630
+ # Pass test_code from metadata for coding tasks
631
+ test_code = pair.metadata.get("test_code") if pair.metadata else None
632
+ eval_result = evaluator.evaluate(
633
+ response="", # Not used for log likelihood eval
634
+ expected=expected,
635
+ model=model,
636
+ question=pair.prompt,
637
+ choices=choices,
638
+ steering_plan=steering_plan,
639
+ test_code=test_code,
640
+ task_name=task_name,
641
+ )
642
+
643
+ # Convert TRUTHFUL/UNTRUTHFUL to 1.0/0.0
644
+ is_correct = eval_result.ground_truth == "TRUTHFUL"
645
+ test_scores.append(1.0 if is_correct else 0.0)
646
+
647
+ # Save full evaluation details
648
+ detailed_results.append(
649
+ {
650
+ "prompt": pair.prompt,
651
+ "choices": choices,
652
+ "expected": expected,
653
+ "ground_truth": eval_result.ground_truth,
654
+ "method_used": eval_result.method_used,
655
+ "confidence": eval_result.confidence,
656
+ "details": eval_result.details,
657
+ "meta": dict(eval_result.meta) if eval_result.meta else {},
658
+ "is_correct": is_correct,
659
+ }
660
+ )
661
+
662
+ # Track delta if baseline was computed
663
+ if baseline_results and "per_problem" in baseline_results:
664
+ baseline_correct = baseline_results["per_problem"][pair_idx][
665
+ "baseline_correct"
666
+ ]
667
+ if not baseline_correct and is_correct:
668
+ delta_status = "improved"
669
+ elif baseline_correct and not is_correct:
670
+ delta_status = "regressed"
671
+ else:
672
+ delta_status = "unchanged"
673
+
674
+ delta_tracking.append(
675
+ {
676
+ "pair_index": pair_idx,
677
+ "prompt": pair.prompt,
678
+ "expected": expected,
679
+ "baseline_correct": baseline_correct,
680
+ "steered_correct": is_correct,
681
+ "delta_status": delta_status,
682
+ }
683
+ )
684
+
685
+ except Exception as e:
686
+ # NO FALLBACK - raise the error immediately
687
+ print("\nāŒ Evaluation failed for test pair:")
688
+ print(f" Prompt: {pair.prompt[:100]}")
689
+ print(f" Error: {e}")
690
+ raise
691
+
692
+ # Clear steering
693
+ model.clear_steering()
694
+
695
+ if len(test_scores) > 0:
696
+ avg_score = np.mean(test_scores)
697
+
698
+ # Generate examples for this configuration if requested
699
+ if args.save_all_generation_examples:
700
+ config_examples = []
701
+ # Get inference config settings
702
+ for idx, pair in enumerate(example_pairs):
703
+ prompt = pair.prompt
704
+ try:
705
+ # Generate without steering (only once per prompt, reuse if already generated)
706
+ unsteered_response = model.generate(
707
+ [[{"role": "user", "content": prompt}]],
708
+ **get_generate_kwargs(max_new_tokens=100),
709
+ use_steering=False,
710
+ )[0]
711
+
712
+ # Create steering plan for this config
713
+ from wisent.core.models.core.atoms import (
714
+ SteeringPlan,
715
+ SteeringVector,
716
+ )
717
+
718
+ steering_vec = SteeringVector(
719
+ vector=steering_vector, scale=strength
720
+ )
721
+ steering_plan = SteeringPlan(
722
+ layers={layer_str: steering_vec},
723
+ layers_description=[
724
+ f"CAA steering layer={layer}, strength={strength}, strategy={strategy}"
725
+ ],
726
+ )
727
+
728
+ # Generate with steering
729
+ model.apply_steering(steering_plan)
730
+ steered_response = model.generate(
731
+ [[{"role": "user", "content": prompt}]],
732
+ **get_generate_kwargs(max_new_tokens=100),
733
+ use_steering=True,
734
+ steering_plan=steering_plan,
735
+ )[0]
736
+ model.clear_steering()
737
+
738
+ config_examples.append(
739
+ {
740
+ "prompt": prompt,
741
+ "correct_answer": pair.positive_response.model_response,
742
+ "incorrect_answer": pair.negative_response.model_response,
743
+ "unsteered_generation": unsteered_response,
744
+ "steered_generation": steered_response,
745
+ }
746
+ )
747
+ except Exception as e:
748
+ if args.verbose:
749
+ print(
750
+ f" āš ļø Failed to generate example for config layer={layer}, strength={strength}, strategy={strategy}: {e}"
751
+ )
752
+
753
+ # Store this config's examples
754
+ all_generation_examples.append(
755
+ {
756
+ "layer": layer,
757
+ "strength": strength,
758
+ "strategy": strategy,
759
+ "accuracy": avg_score,
760
+ "examples": config_examples,
761
+ }
762
+ )
763
+
764
+ # Compute delta summary if baseline was computed
765
+ delta_summary = {}
766
+ if delta_tracking:
767
+ improved = sum(1 for d in delta_tracking if d["delta_status"] == "improved")
768
+ regressed = sum(
769
+ 1 for d in delta_tracking if d["delta_status"] == "regressed"
770
+ )
771
+ unchanged = sum(
772
+ 1 for d in delta_tracking if d["delta_status"] == "unchanged"
773
+ )
774
+ delta_summary = {
775
+ "improved": improved,
776
+ "regressed": regressed,
777
+ "unchanged": unchanged,
778
+ "net_change": improved - regressed,
779
+ }
780
+
781
+ # Store detailed results for this configuration
782
+ config_key = (
783
+ f"L{layer}_S{strength}_{strategy}_{token_agg.value}_{prompt_const.value}"
784
+ )
785
+ method_results[config_key] = {
786
+ "layer": layer,
787
+ "strength": strength,
788
+ "strategy": strategy,
789
+ "token_aggregation": token_agg.value,
790
+ "prompt_construction": prompt_const.value,
791
+ "accuracy": avg_score,
792
+ "num_test_samples": len(test_scores),
793
+ "detailed_results": detailed_results, # Save all eval details
794
+ "delta_tracking": delta_tracking if delta_tracking else None,
795
+ "delta_summary": delta_summary if delta_summary else None,
796
+ }
797
+
798
+ if avg_score > best_score:
799
+ best_score = avg_score
800
+ best_config = {
801
+ "layer": layer,
802
+ "strength": strength,
803
+ "strategy": strategy,
804
+ "token_aggregation": token_agg.value,
805
+ "prompt_construction": prompt_const.value,
806
+ "accuracy": avg_score,
807
+ }
808
+
809
+ if configs_tested % 10 == 0 and args.verbose:
810
+ print(f" Tested {configs_tested} configurations...", end="\r")
811
+
812
+ except Exception as e:
813
+ # NO FALLBACK - raise the error immediately
814
+ print("\nāŒ Configuration test failed:")
815
+ print(f" Layer: {layer}")
816
+ print(f" Strength: {strength}")
817
+ print(f" Strategy: {strategy}")
818
+ print(f" Error: {e}")
819
+ raise
820
+
821
+ if best_config:
822
+ print("\n āœ… Best configuration found:")
823
+ print(" Method: CAA")
824
+ print(f" Layer: {best_config['layer']}")
825
+ print(f" Strength: {best_config['strength']}")
826
+ print(f" Strategy: {best_config['strategy']} ⭐")
827
+ print(f" Token Aggregation: {best_config['token_aggregation']}")
828
+ print(f" Prompt Construction: {best_config['prompt_construction']}")
829
+ print(f" Accuracy: {best_config['accuracy']:.3f}")
830
+
831
+ method_results["CAA"] = {
832
+ "optimal_layer": best_config["layer"],
833
+ "optimal_strength": best_config["strength"],
834
+ "optimal_strategy": best_config["strategy"],
835
+ "optimal_token_aggregation": best_config["token_aggregation"],
836
+ "optimal_prompt_construction": best_config["prompt_construction"],
837
+ "accuracy": best_config["accuracy"],
838
+ "f1": best_config["accuracy"],
839
+ }
840
+
841
+ # Save baseline comparison results if computed
842
+ if hasattr(args, "compute_baseline") and args.compute_baseline and baseline_results:
843
+ import os
844
+
845
+ baseline_dir = (
846
+ args.baseline_output_dir if hasattr(args, "baseline_output_dir") else "./baseline_comparison"
847
+ )
848
+ os.makedirs(baseline_dir, exist_ok=True)
849
+
850
+ # Get delta tracking for best config
851
+ best_config_key = f"L{best_config['layer']}_S{best_config['strength']}_{best_config['strategy']}_{best_config['token_aggregation']}_{best_config['prompt_construction']}"
852
+ best_config_results = method_results.get(best_config_key, {})
853
+ best_delta_tracking = best_config_results.get("delta_tracking", [])
854
+ best_delta_summary = best_config_results.get("delta_summary", {})
855
+
856
+ # Separate improved, regressed, unchanged for inspection
857
+ improved_examples = [d for d in best_delta_tracking if d.get("delta_status") == "improved"]
858
+ regressed_examples = [d for d in best_delta_tracking if d.get("delta_status") == "regressed"]
859
+ unchanged_examples = [d for d in best_delta_tracking if d.get("delta_status") == "unchanged"]
860
+
861
+ baseline_comparison_data = {
862
+ "task": task_name,
863
+ "model": args.model,
864
+ "baseline_accuracy": baseline_results["accuracy"],
865
+ "best_steered_accuracy": best_config["accuracy"],
866
+ "delta": best_config["accuracy"] - baseline_results["accuracy"],
867
+ "best_config": best_config,
868
+ "summary": best_delta_summary,
869
+ "improved_examples": improved_examples,
870
+ "regressed_examples": regressed_examples,
871
+ "unchanged_examples": unchanged_examples,
872
+ "baseline_per_problem": baseline_results["per_problem"],
873
+ }
874
+
875
+ comparison_path = os.path.join(baseline_dir, f"{task_name}_baseline_comparison.json")
876
+ with open(comparison_path, "w") as f:
877
+ json.dump(baseline_comparison_data, f, indent=2)
878
+
879
+ print("\n šŸ“Š Baseline Comparison Summary:")
880
+ print(f" Baseline (unsteered) accuracy: {baseline_results['accuracy']:.3f}")
881
+ print(f" Best steered accuracy: {best_config['accuracy']:.3f}")
882
+ print(f" Delta: {(best_config['accuracy'] - baseline_results['accuracy']) * 100:+.1f}%")
883
+ if best_delta_summary:
884
+ print(f" Improved: {best_delta_summary.get('improved', 0)} problems")
885
+ print(f" Regressed: {best_delta_summary.get('regressed', 0)} problems")
886
+ print(f" Unchanged: {best_delta_summary.get('unchanged', 0)} problems")
887
+ print(f" Net change: {best_delta_summary.get('net_change', 0)} problems")
888
+ print(f" šŸ’¾ Saved comparison to: {comparison_path}")
889
+
890
+ # Save best steering vector if requested
891
+ if args.save_best_vector:
892
+ import os
893
+
894
+ vector_dir = args.save_best_vector
895
+ os.makedirs(vector_dir, exist_ok=True)
896
+
897
+ # Recreate the best steering vector with optimal token aggregation
898
+ best_layer_str = str(best_config["layer"])
899
+ best_token_agg = ActivationAggregationStrategy(best_config["token_aggregation"])
900
+ pos_acts_best = []
901
+ neg_acts_best = []
902
+
903
+ for pair in train_pairs.pairs:
904
+ updated_pair = collector.collect_for_pair(
905
+ pair,
906
+ layers=[best_layer_str],
907
+ aggregation=best_token_agg, # Use optimal token aggregation
908
+ return_full_sequence=False,
909
+ normalize_layers=False,
910
+ )
911
+
912
+ if (
913
+ updated_pair.positive_response.layers_activations
914
+ and best_layer_str in updated_pair.positive_response.layers_activations
915
+ ):
916
+ act = updated_pair.positive_response.layers_activations[best_layer_str]
917
+ if act is not None:
918
+ pos_acts_best.append(act)
919
+
920
+ if (
921
+ updated_pair.negative_response.layers_activations
922
+ and best_layer_str in updated_pair.negative_response.layers_activations
923
+ ):
924
+ act = updated_pair.negative_response.layers_activations[best_layer_str]
925
+ if act is not None:
926
+ neg_acts_best.append(act)
927
+
928
+ # Create and save steering vector
929
+ method_name = args.methods[0] if args.methods else "CAA"
930
+ steering_method = create_steering_method(method_name, args)
931
+ best_steering_vector = steering_method.train_for_layer(pos_acts_best, neg_acts_best)
932
+
933
+ vector_path = os.path.join(vector_dir, f"{task_name}_layer{best_config['layer']}.pt")
934
+ torch.save(
935
+ {
936
+ "steering_vector": best_steering_vector,
937
+ "vector": best_steering_vector, # Legacy key
938
+ "layer": best_config["layer"],
939
+ "layer_index": best_config["layer"], # Legacy key
940
+ "strength": best_config["strength"],
941
+ "strategy": best_config["strategy"],
942
+ "token_aggregation": best_config["token_aggregation"],
943
+ "prompt_construction": best_config["prompt_construction"],
944
+ "method": "CAA",
945
+ "task": task_name,
946
+ "model": args.model,
947
+ "accuracy": best_config["accuracy"],
948
+ },
949
+ vector_path,
950
+ )
951
+ print(f" šŸ’¾ Saved steering vector to: {vector_path}")
952
+
953
+ # Save generation examples
954
+ if args.save_all_generation_examples:
955
+ # Save examples for ALL configurations
956
+ examples_path = os.path.join(
957
+ args.save_best_vector if args.save_best_vector else "./optimization_results",
958
+ f"{task_name}_all_generation_examples.json",
959
+ )
960
+ os.makedirs(os.path.dirname(examples_path), exist_ok=True)
961
+
962
+ with open(examples_path, "w") as f:
963
+ json.dump(
964
+ {
965
+ "task": task_name,
966
+ "model": args.model,
967
+ "best_config": best_config,
968
+ "configurations": all_generation_examples,
969
+ },
970
+ f,
971
+ indent=2,
972
+ )
973
+
974
+ print(
975
+ f"\n šŸ’¾ Saved generation examples for {len(all_generation_examples)} configurations to: {examples_path}"
976
+ )
977
+
978
+ # Generate examples for --save-generation-examples, --show-comparisons, or --save-comparisons
979
+ show_comparisons = getattr(args, 'show_comparisons', 0)
980
+ save_comparisons = getattr(args, 'save_comparisons', None)
981
+ need_generation = args.save_generation_examples or show_comparisons > 0 or save_comparisons
982
+
983
+ if need_generation:
984
+ # Save examples only for the best configuration
985
+ print("\n šŸ“ Generating example responses for best configuration...")
986
+
987
+ # Get a few test examples to generate from
988
+ num_examples = min(args.num_generation_examples, len(test_pairs.pairs))
989
+ example_pairs = test_pairs.pairs[:num_examples]
990
+
991
+ generation_examples = []
992
+
993
+ # Get inference config settings
994
+ gen_kwargs = get_generate_kwargs()
995
+
996
+ for idx, pair in enumerate(example_pairs):
997
+ # Create prompt from the question
998
+ prompt = pair.prompt
999
+
1000
+ try:
1001
+ # Generate without steering
1002
+ unsteered_response = model.generate(
1003
+ [[{"role": "user", "content": prompt}]],
1004
+ **get_generate_kwargs(max_new_tokens=100),
1005
+ use_steering=False,
1006
+ )[0]
1007
+
1008
+ # Recreate best steering vector for generation
1009
+ best_layer_str = str(best_config["layer"])
1010
+ pos_acts_gen = []
1011
+ neg_acts_gen = []
1012
+
1013
+ # Collect activations again for steering
1014
+ for train_pair in train_pairs.pairs[:20]: # Use subset for speed
1015
+ updated_pair = collector.collect_for_pair(
1016
+ train_pair,
1017
+ layers=[best_layer_str],
1018
+ aggregation=ActivationAggregationStrategy.MEAN_POOLING,
1019
+ return_full_sequence=False,
1020
+ normalize_layers=False,
1021
+ )
1022
+
1023
+ if (
1024
+ updated_pair.positive_response.layers_activations
1025
+ and best_layer_str in updated_pair.positive_response.layers_activations
1026
+ ):
1027
+ act = updated_pair.positive_response.layers_activations[best_layer_str]
1028
+ if act is not None:
1029
+ pos_acts_gen.append(act)
1030
+
1031
+ if (
1032
+ updated_pair.negative_response.layers_activations
1033
+ and best_layer_str in updated_pair.negative_response.layers_activations
1034
+ ):
1035
+ act = updated_pair.negative_response.layers_activations[best_layer_str]
1036
+ if act is not None:
1037
+ neg_acts_gen.append(act)
1038
+
1039
+ # Create steering vector
1040
+ method_name_gen = args.methods[0] if args.methods else "CAA"
1041
+ steering_method_gen = create_steering_method(method_name_gen, args)
1042
+ steering_vector_gen = steering_method_gen.train_for_layer(pos_acts_gen, neg_acts_gen)
1043
+
1044
+ # Create SteeringPlan
1045
+ from wisent.core.models.core.atoms import SteeringPlan, SteeringVector
1046
+
1047
+ steering_vec = SteeringVector(vector=steering_vector_gen, scale=best_config["strength"])
1048
+ steering_plan = SteeringPlan(
1049
+ layers={best_layer_str: steering_vec},
1050
+ layers_description=[f"CAA steering for {task_name}"],
1051
+ )
1052
+
1053
+ # Generate with steering
1054
+ model.apply_steering(steering_plan)
1055
+ steered_response = model.generate(
1056
+ [[{"role": "user", "content": prompt}]],
1057
+ **get_generate_kwargs(max_new_tokens=100),
1058
+ use_steering=True,
1059
+ steering_plan=steering_plan,
1060
+ )[0]
1061
+ model.detach()
1062
+
1063
+ generation_examples.append(
1064
+ {
1065
+ "question": prompt,
1066
+ "correct_answer": pair.positive_response.model_response,
1067
+ "incorrect_answer": pair.negative_response.model_response,
1068
+ "unsteered_generation": unsteered_response,
1069
+ "steered_generation": steered_response,
1070
+ }
1071
+ )
1072
+
1073
+ print(f" Generated example {idx + 1}/{num_examples}")
1074
+
1075
+ except Exception as e:
1076
+ print(f" āš ļø Failed to generate example {idx + 1}: {e}")
1077
+ if args.verbose:
1078
+ import traceback
1079
+
1080
+ traceback.print_exc()
1081
+
1082
+ # Save examples to JSON only if --save-generation-examples is set
1083
+ if args.save_generation_examples:
1084
+ examples_path = os.path.join(
1085
+ args.save_best_vector if args.save_best_vector else "./optimization_results",
1086
+ f"{task_name}_generation_examples.json",
1087
+ )
1088
+ os.makedirs(os.path.dirname(examples_path), exist_ok=True)
1089
+
1090
+ with open(examples_path, "w") as f:
1091
+ json.dump(
1092
+ {
1093
+ "task": task_name,
1094
+ "model": args.model,
1095
+ "best_config": best_config,
1096
+ "examples": generation_examples,
1097
+ },
1098
+ f,
1099
+ indent=2,
1100
+ )
1101
+
1102
+ print(f" šŸ’¾ Saved {len(generation_examples)} generation examples to: {examples_path}")
1103
+
1104
+ # Handle --show-comparisons and --save-comparisons flags
1105
+ if (show_comparisons > 0 or save_comparisons) and generation_examples:
1106
+ # Build comparisons list from generation_examples
1107
+ comparisons = []
1108
+ for ex in generation_examples:
1109
+ comparisons.append({
1110
+ "prompt": ex["question"],
1111
+ "baseline_response": ex["unsteered_generation"],
1112
+ "optimized_response": ex["steered_generation"],
1113
+ "correct_answer": ex.get("correct_answer", ""),
1114
+ "incorrect_answer": ex.get("incorrect_answer", ""),
1115
+ })
1116
+
1117
+ # Save to JSON if requested
1118
+ if save_comparisons:
1119
+ os.makedirs(os.path.dirname(save_comparisons) if os.path.dirname(save_comparisons) else ".", exist_ok=True)
1120
+ with open(save_comparisons, "w") as f:
1121
+ json.dump({
1122
+ "model": args.model,
1123
+ "task": task_name,
1124
+ "best_config": best_config,
1125
+ "comparisons": comparisons,
1126
+ }, f, indent=2)
1127
+ print(f" šŸ’¾ Saved comparisons to: {save_comparisons}")
1128
+
1129
+ # Display in console if requested
1130
+ if show_comparisons > 0:
1131
+ print(f"\n šŸ“Š Top {min(show_comparisons, len(comparisons))} Baseline vs Optimized Comparisons:\n")
1132
+ for i, comp in enumerate(comparisons[:show_comparisons]):
1133
+ print(f"{'─'*80}")
1134
+ print(f"Comparison {i+1}/{min(show_comparisons, len(comparisons))}")
1135
+ print(f"{'─'*80}")
1136
+ print(f"PROMPT: {comp['prompt'][:200]}{'...' if len(comp['prompt']) > 200 else ''}")
1137
+ print()
1138
+ print(f"BASELINE (unsteered):")
1139
+ print(f" {comp['baseline_response'][:300]}{'...' if len(comp['baseline_response']) > 300 else ''}")
1140
+ print()
1141
+ print(f"OPTIMIZED (steered):")
1142
+ print(f" {comp['optimized_response'][:300]}{'...' if len(comp['optimized_response']) > 300 else ''}")
1143
+ print()
1144
+
1145
+ else:
1146
+ print("\n āš ļø No valid configuration found")
1147
+ method_results["CAA"] = {
1148
+ "optimal_layer": 8,
1149
+ "optimal_strength": 1.0,
1150
+ "optimal_strategy": "constant",
1151
+ "optimal_token_aggregation": "last_token",
1152
+ "optimal_prompt_construction": "chat_template",
1153
+ "accuracy": 0.5,
1154
+ "f1": 0.5,
1155
+ }
1156
+
1157
+ all_results[task_name] = {
1158
+ "methods": method_results,
1159
+ "best_method": "CAA",
1160
+ "best_layer": method_results["CAA"]["optimal_layer"],
1161
+ "best_strength": method_results["CAA"]["optimal_strength"],
1162
+ "best_strategy": method_results["CAA"]["optimal_strategy"],
1163
+ "best_token_aggregation": method_results["CAA"]["optimal_token_aggregation"],
1164
+ "best_prompt_construction": method_results["CAA"]["optimal_prompt_construction"],
1165
+ }
1166
+
1167
+ task_time = time.time() - task_start_time
1168
+ print(f"\n ā±ļø Task completed in {task_time:.1f}s (tested {configs_tested} configurations)")
1169
+
1170
+ except Exception as e:
1171
+ # NO FALLBACK - raise the error immediately
1172
+ print(f"\nāŒ Task '{task_name}' optimization failed:")
1173
+ print(f" Error: {e}")
1174
+ import traceback
1175
+
1176
+ traceback.print_exc()
1177
+ raise
1178
+
1179
+ # Save results
1180
+ print(f"\n{'=' * 80}")
1181
+ print("šŸ“Š COMPREHENSIVE OPTIMIZATION COMPLETE")
1182
+ print(f"{'=' * 80}\n")
1183
+
1184
+ results_file = f"./optimization_results/steering_comprehensive_{args.model.replace('/', '_')}.json"
1185
+ import os
1186
+
1187
+ os.makedirs(os.path.dirname(results_file), exist_ok=True)
1188
+
1189
+ output_data = {
1190
+ "model": args.model,
1191
+ "tasks": all_results,
1192
+ "methods_tested": args.methods,
1193
+ "limit": args.limit,
1194
+ "optimization_dimensions": ["layer", "strength", "strategy", "token_aggregation", "prompt_construction"],
1195
+ }
1196
+
1197
+ with open(results_file, "w") as f:
1198
+ json.dump(output_data, f, indent=2)
1199
+
1200
+ print(f"āœ… Results saved to: {results_file}\n")
1201
+
1202
+ # Print summary
1203
+ print("šŸ“‹ SUMMARY BY TASK:")
1204
+ print("-" * 140)
1205
+ for task_name, config in all_results.items():
1206
+ print(
1207
+ f" {task_name:20s} | L{config['best_layer']:2d} S{config['best_strength']:.1f} | {config['best_strategy']:12s} | T:{config['best_token_aggregation']:12s} | P:{config['best_prompt_construction']:18s}"
1208
+ )
1209
+ print("-" * 140 + "\n")
1210
+
1211
+ # Store results in optimization cache
1212
+ save_as_default = getattr(args, "save_as_default", False)
1213
+ print("šŸ’¾ Storing results in optimization cache...")
1214
+ for task_name, config in all_results.items():
1215
+ # Skip results that came from cache
1216
+ if config.get("from_cache"):
1217
+ continue
1218
+
1219
+ # Get best score from methods if available
1220
+ best_score = 0.0
1221
+ if "methods" in config and "CAA" in config["methods"]:
1222
+ best_score = config["methods"]["CAA"].get("accuracy", 0.0)
1223
+
1224
+ # Get best method name
1225
+ best_method_name = config.get("best_method", "CAA")
1226
+
1227
+ # Get method-specific parameters from the best config
1228
+ method_config = config.get("methods", {}).get(best_method_name, {})
1229
+
1230
+ cache_key = store_optimization(
1231
+ model=args.model,
1232
+ task=task_name,
1233
+ layer=config["best_layer"],
1234
+ strength=config["best_strength"],
1235
+ method=best_method_name,
1236
+ token_aggregation=config.get("best_token_aggregation", "last_token"),
1237
+ prompt_strategy=config.get("best_prompt_construction", "chat_template"),
1238
+ strategy=config.get("best_strategy", "constant"),
1239
+ score=best_score,
1240
+ metric="accuracy",
1241
+ metadata={"limit": args.limit},
1242
+ set_as_default=save_as_default,
1243
+ # PRISM parameters
1244
+ num_directions=method_config.get("num_directions", 1),
1245
+ direction_weighting=method_config.get("direction_weighting", "primary_only"),
1246
+ retain_weight=method_config.get("retain_weight", 0.0),
1247
+ independence_weight=method_config.get("independence_weight", 0.05),
1248
+ prism_optimization_steps=method_config.get("optimization_steps", 100),
1249
+ # PULSE parameters
1250
+ sensor_layer=method_config.get("sensor_layer", -1),
1251
+ steering_layers=method_config.get("steering_layers", ""),
1252
+ condition_threshold=method_config.get("condition_threshold", 0.5),
1253
+ gate_temperature=method_config.get("gate_temperature", 0.5),
1254
+ per_layer_scaling=method_config.get("per_layer_scaling", True),
1255
+ use_entropy_scaling=method_config.get("use_entropy_scaling", False),
1256
+ max_alpha=method_config.get("max_alpha", 2.0),
1257
+ # TITAN parameters
1258
+ gate_hidden_dim=method_config.get("gate_hidden_dim", 64),
1259
+ intensity_hidden_dim=method_config.get("intensity_hidden_dim", 32),
1260
+ behavior_weight=method_config.get("behavior_weight", 1.0),
1261
+ sparse_weight=method_config.get("sparse_weight", 0.05),
1262
+ titan_optimization_steps=method_config.get("titan_optimization_steps", 200),
1263
+ titan_learning_rate=method_config.get("titan_learning_rate", 0.005),
1264
+ # Store all method params as generic dict
1265
+ method_params=method_config,
1266
+ )
1267
+ print(f" āœ“ Cached {task_name}: {cache_key}")
1268
+
1269
+ if save_as_default:
1270
+ print(" āœ“ Results set as default configurations")
1271
+
1272
+ # Return results for programmatic access
1273
+ return {
1274
+ "model": args.model,
1275
+ "action": "comprehensive",
1276
+ "methods_tested": args.methods,
1277
+ "tasks_optimized": list(all_results.keys()),
1278
+ "results": all_results,
1279
+ "results_file": results_file,
1280
+ "optimization_dimensions": ["layer", "strength", "strategy", "token_aggregation", "prompt_construction"],
1281
+ }
1282
+
1283
+
1284
+ def get_strategy_weight(strategy: str, position: float) -> float:
1285
+ """
1286
+ Calculate steering weight based on strategy and token position.
1287
+
1288
+ Args:
1289
+ strategy: Steering strategy name
1290
+ position: Token position as fraction (0.0 = start, 1.0 = end)
1291
+
1292
+ Returns:
1293
+ Weight multiplier for steering vector
1294
+ """
1295
+ if strategy == "last_only":
1296
+ return 1.0 if position >= 0.9 else 0.0
1297
+ if strategy == "first_only":
1298
+ return 1.0 if position <= 0.1 else 0.0
1299
+ if strategy == "all_equal":
1300
+ return 1.0
1301
+ if strategy == "exponential_decay":
1302
+ return np.exp(-3.0 * position) # Decay rate of 3
1303
+ if strategy == "exponential_growth":
1304
+ return np.exp(3.0 * position)
1305
+ if strategy == "linear_decay":
1306
+ return 1.0 - position
1307
+ if strategy == "linear_growth":
1308
+ return position
1309
+ return 1.0 # Default to all_equal
1310
+
1311
+
1312
+ def execute_compare_methods(args, model, loader):
1313
+ """Execute method comparison - currently only CAA is implemented."""
1314
+ import matplotlib.pyplot as plt
1315
+ from wisent_plots import LineChart
1316
+
1317
+ from wisent.core.activations.activations_collector import ActivationCollector
1318
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
1319
+ from wisent.core.models.core.atoms import SteeringPlan, SteeringVector
1320
+ from wisent.core.cli.steering_method_trainer import create_steering_method
1321
+
1322
+ # Check for cached results if --use-cached is specified
1323
+ use_cached = getattr(args, "use_cached", False)
1324
+ save_as_default = getattr(args, "save_as_default", False)
1325
+
1326
+ if use_cached:
1327
+ print(f"\nšŸ“¦ Checking optimization cache for {args.task}...")
1328
+ for method in args.methods:
1329
+ cached = get_cached_optimization(args.model, args.task, method)
1330
+ if cached:
1331
+ print(
1332
+ f" āœ“ Found cached result for {method}: layer={cached.layer}, strength={cached.strength}, score={cached.score:.3f}"
1333
+ )
1334
+ return {
1335
+ "model": args.model,
1336
+ "action": "compare-methods",
1337
+ "task": args.task,
1338
+ "best_method": method,
1339
+ "best_layer": cached.layer,
1340
+ "best_strength": cached.strength,
1341
+ "best_score": cached.score,
1342
+ "from_cache": True,
1343
+ }
1344
+ print(" No cached results found. Running optimization...")
1345
+
1346
+ print(f"šŸ” Comparing steering methods for task: {args.task}\n")
1347
+ print(f" Methods: {', '.join(args.methods)}")
1348
+ print(f" Limit: {args.limit} samples")
1349
+ print(f" Layer: {args.layer}")
1350
+ print(f" Strength: {args.strength}\n")
1351
+
1352
+ # Load task data
1353
+ print("šŸ“Š Loading task data...")
1354
+ result = loader._load_one_task(
1355
+ task_name=args.task, split_ratio=0.8, seed=42, limit=args.limit, training_limit=None, testing_limit=None
1356
+ )
1357
+
1358
+ train_pairs = result["train_qa_pairs"]
1359
+ test_pairs = result["test_qa_pairs"]
1360
+ print(f" āœ“ Loaded {len(train_pairs.pairs)} train, {len(test_pairs.pairs)} test pairs\n")
1361
+
1362
+ # Initialize evaluator
1363
+ EvaluatorRotator.discover_evaluators("wisent.core.evaluators.benchmark_specific")
1364
+ evaluator = EvaluatorRotator(evaluator=None, task_name=args.task)
1365
+ print(f" āœ“ Using evaluator: {evaluator._plugin.name}\n")
1366
+
1367
+ # Collect activations once for all methods
1368
+ layer_str = str(args.layer)
1369
+ collector = ActivationCollector(model=model, store_device="cpu")
1370
+
1371
+ print("šŸŽÆ Collecting training activations (ONCE)...")
1372
+ pos_acts = []
1373
+ neg_acts = []
1374
+
1375
+ for i, pair in enumerate(train_pairs.pairs):
1376
+ if i % 10 == 0:
1377
+ print(f" Processing train pair {i + 1}/{len(train_pairs.pairs)}...", end="\r")
1378
+
1379
+ updated_pair = collector.collect_for_pair(
1380
+ pair,
1381
+ layers=[layer_str],
1382
+ aggregation=ActivationAggregationStrategy.MEAN_POOLING,
1383
+ return_full_sequence=False,
1384
+ normalize_layers=False,
1385
+ )
1386
+
1387
+ if (
1388
+ updated_pair.positive_response.layers_activations
1389
+ and layer_str in updated_pair.positive_response.layers_activations
1390
+ ):
1391
+ act = updated_pair.positive_response.layers_activations[layer_str]
1392
+ if act is not None:
1393
+ pos_acts.append(act)
1394
+
1395
+ if (
1396
+ updated_pair.negative_response.layers_activations
1397
+ and layer_str in updated_pair.negative_response.layers_activations
1398
+ ):
1399
+ act = updated_pair.negative_response.layers_activations[layer_str]
1400
+ if act is not None:
1401
+ neg_acts.append(act)
1402
+
1403
+ print(f" Processing train pair {len(train_pairs.pairs)}/{len(train_pairs.pairs)}... Done!")
1404
+ print(f" āœ“ Collected {len(pos_acts)} positive, {len(neg_acts)} negative activations\n")
1405
+
1406
+ # Test each method
1407
+ print("🧪 Testing methods...")
1408
+ method_results = {}
1409
+
1410
+ # Only CAA is implemented for now
1411
+ if "CAA" in args.methods:
1412
+ print("\n Testing CAA method...")
1413
+
1414
+ # Train steering vector using selected method
1415
+ method_name = args.methods[0] if args.methods else "CAA"
1416
+ steering_method = create_steering_method(method_name, args)
1417
+ steering_vector = steering_method.train_for_layer(pos_acts, neg_acts)
1418
+
1419
+ # Create steering plan
1420
+ steering_vec = SteeringVector(vector=steering_vector, scale=args.strength)
1421
+ steering_plan = SteeringPlan(
1422
+ layers={layer_str: steering_vec},
1423
+ layers_description=[f"CAA steering layer={args.layer}, strength={args.strength}"],
1424
+ )
1425
+
1426
+ # Apply steering and evaluate
1427
+ model.apply_steering(steering_plan)
1428
+
1429
+ test_scores = []
1430
+ detailed_results = []
1431
+ for pair in test_pairs.pairs:
1432
+ choices = [pair.negative_response.model_response, pair.positive_response.model_response]
1433
+ expected = pair.positive_response.model_response
1434
+ test_code = pair.metadata.get("test_code") if pair.metadata else None
1435
+
1436
+ eval_result = evaluator.evaluate(
1437
+ response="",
1438
+ expected=expected,
1439
+ model=model,
1440
+ question=pair.prompt,
1441
+ choices=choices,
1442
+ steering_plan=steering_plan,
1443
+ test_code=test_code,
1444
+ task_name=args.task,
1445
+ )
1446
+
1447
+ is_correct = eval_result.ground_truth == "TRUTHFUL"
1448
+ test_scores.append(1.0 if is_correct else 0.0)
1449
+
1450
+ # Save full evaluation details
1451
+ detailed_results.append(
1452
+ {
1453
+ "question": pair.prompt,
1454
+ "choices": choices,
1455
+ "expected": expected,
1456
+ "ground_truth": eval_result.ground_truth,
1457
+ "method_used": eval_result.method_used,
1458
+ "confidence": eval_result.confidence,
1459
+ "details": eval_result.details,
1460
+ "meta": dict(eval_result.meta) if eval_result.meta else {},
1461
+ "is_correct": is_correct,
1462
+ }
1463
+ )
1464
+
1465
+ model.clear_steering()
1466
+
1467
+ caa_accuracy = np.mean(test_scores) if len(test_scores) > 0 else 0.0
1468
+ method_results["CAA"] = {
1469
+ "accuracy": caa_accuracy,
1470
+ "num_test_samples": len(test_scores),
1471
+ "detailed_results": detailed_results,
1472
+ }
1473
+
1474
+ print(f" āœ“ CAA: accuracy={caa_accuracy:.3f}")
1475
+
1476
+ # Other methods are not yet implemented
1477
+ for method in args.methods:
1478
+ if method not in ["CAA"]:
1479
+ print(f" āš ļø {method}: not yet implemented")
1480
+ method_results[method] = {"accuracy": 0.0, "status": "not_implemented"}
1481
+
1482
+ # Save results
1483
+ print(f"\n{'=' * 80}")
1484
+ print("šŸ“Š METHOD COMPARISON COMPLETE")
1485
+ print(f"{'=' * 80}\n")
1486
+
1487
+ results_file = f"./optimization_results/steering_compare_methods_{args.task}_{args.model.replace('/', '_')}.json"
1488
+ import os
1489
+
1490
+ os.makedirs(os.path.dirname(results_file), exist_ok=True)
1491
+
1492
+ output_data = {
1493
+ "model": args.model,
1494
+ "task": args.task,
1495
+ "layer": args.layer,
1496
+ "strength": args.strength,
1497
+ "methods": method_results,
1498
+ "limit": args.limit,
1499
+ }
1500
+
1501
+ with open(results_file, "w") as f:
1502
+ json.dump(output_data, f, indent=2)
1503
+
1504
+ print(f"āœ… Results saved to: {results_file}\n")
1505
+
1506
+ # Create comparison plot if we have results
1507
+ implemented_methods = [m for m in method_results if method_results[m].get("accuracy", 0) > 0]
1508
+ if len(implemented_methods) > 1 and args.save_plot:
1509
+ plot_path_svg = f"steering_compare_methods_{args.task}_{args.model.replace('/', '_')}.svg"
1510
+ plot_path_png = f"steering_compare_methods_{args.task}_{args.model.replace('/', '_')}.png"
1511
+
1512
+ method_names = list(implemented_methods)
1513
+ accuracies = [method_results[m]["accuracy"] for m in method_names]
1514
+
1515
+ chart = LineChart(style=1, figsize=(10, 6), show_markers=True)
1516
+ fig, ax = plt.subplots(1, 1, figsize=(10, 6))
1517
+
1518
+ ax.bar(method_names, accuracies, color="#3498db", alpha=0.8)
1519
+ ax.set_xlabel("Steering Method")
1520
+ ax.set_ylabel("Accuracy")
1521
+ ax.set_title(f"Steering Method Comparison\n{args.model} on {args.task}")
1522
+ ax.set_ylim(0, 1)
1523
+
1524
+ fig.savefig(plot_path_svg, format="svg", bbox_inches="tight")
1525
+ fig.savefig(plot_path_png, dpi=150, bbox_inches="tight")
1526
+ plt.close(fig)
1527
+
1528
+ print("šŸ’¾ Comparison plot saved to:")
1529
+ print(f" SVG: {plot_path_svg}")
1530
+ print(f" PNG: {plot_path_png}\n")
1531
+
1532
+ # Store best result in cache
1533
+ save_as_default = getattr(args, "save_as_default", False)
1534
+ best_method = max(method_results.keys(), key=lambda m: method_results[m].get("accuracy", 0))
1535
+ best_accuracy = method_results[best_method].get("accuracy", 0)
1536
+
1537
+ if best_accuracy > 0:
1538
+ print("šŸ’¾ Storing best result in optimization cache...")
1539
+ cache_key = store_optimization(
1540
+ model=args.model,
1541
+ task=args.task,
1542
+ layer=args.layer,
1543
+ strength=args.strength,
1544
+ method=best_method,
1545
+ strategy="constant",
1546
+ score=best_accuracy,
1547
+ metric="accuracy",
1548
+ metadata={"limit": args.limit},
1549
+ set_as_default=save_as_default,
1550
+ )
1551
+ print(f" āœ“ Cached: {cache_key}")
1552
+ if save_as_default:
1553
+ print(" āœ“ Set as default configuration")
1554
+
1555
+ return {"action": "compare-methods", "task": args.task, "methods": method_results, "results_file": results_file}
1556
+
1557
+
1558
+ def execute_optimize_layer(args, model, loader):
1559
+ """Execute layer optimization - find the best layer for steering."""
1560
+ import matplotlib.pyplot as plt
1561
+ from wisent_plots import LineChart
1562
+
1563
+ from wisent.core.activations.activations_collector import ActivationCollector
1564
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
1565
+ from wisent.core.models.core.atoms import SteeringPlan, SteeringVector
1566
+ from wisent.core.cli.steering_method_trainer import create_steering_method
1567
+
1568
+ # Check for cached results if --use-cached is specified
1569
+ use_cached = getattr(args, "use_cached", False)
1570
+ save_as_default = getattr(args, "save_as_default", False)
1571
+
1572
+ if use_cached:
1573
+ print(f"\nšŸ“¦ Checking optimization cache for {args.task}/{args.method}...")
1574
+ cached = get_cached_optimization(args.model, args.task, args.method)
1575
+ if cached:
1576
+ print(
1577
+ f" āœ“ Found cached result: layer={cached.layer}, strength={cached.strength}, score={cached.score:.3f}"
1578
+ )
1579
+ return {
1580
+ "model": args.model,
1581
+ "action": "optimize-layer",
1582
+ "task": args.task,
1583
+ "method": args.method,
1584
+ "best_layer": cached.layer,
1585
+ "best_strength": cached.strength,
1586
+ "best_accuracy": cached.score,
1587
+ "from_cache": True,
1588
+ }
1589
+ print(" No cached results found. Running optimization...")
1590
+
1591
+ print(f"šŸŽÆ Optimizing steering layer for task: {args.task}\n")
1592
+ print(f" Method: {args.method}")
1593
+ print(f" Strength: {args.strength}")
1594
+ print(f" Limit: {args.limit} samples\n")
1595
+
1596
+ # Load task data
1597
+ print("šŸ“Š Loading task data...")
1598
+ result = loader._load_one_task(
1599
+ task_name=args.task, split_ratio=0.8, seed=42, limit=args.limit, training_limit=None, testing_limit=None
1600
+ )
1601
+
1602
+ train_pairs = result["train_qa_pairs"]
1603
+ test_pairs = result["test_qa_pairs"]
1604
+ print(f" āœ“ Loaded {len(train_pairs.pairs)} train, {len(test_pairs.pairs)} test pairs\n")
1605
+
1606
+ # Initialize evaluator
1607
+ EvaluatorRotator.discover_evaluators("wisent.core.evaluators.benchmark_specific")
1608
+ evaluator = EvaluatorRotator(evaluator=None, task_name=args.task)
1609
+ print(f" āœ“ Using evaluator: {evaluator._plugin.name}\n")
1610
+
1611
+ # Determine layers to test
1612
+ if args.layers:
1613
+ layers_to_test = args.layers
1614
+ else:
1615
+ # Test all layers from 0 to num_layers-1
1616
+ layers_to_test = list(range(model.num_layers))
1617
+
1618
+ print(f"šŸ” Testing {len(layers_to_test)} layers: {layers_to_test[:5]}{'...' if len(layers_to_test) > 5 else ''}\n")
1619
+
1620
+ collector = ActivationCollector(model=model, store_device="cpu")
1621
+ layer_results = {}
1622
+ best_layer = None
1623
+ best_accuracy = 0.0
1624
+
1625
+ for layer_idx, layer in enumerate(layers_to_test, 1):
1626
+ layer_str = str(layer)
1627
+ print(f" [{layer_idx}/{len(layers_to_test)}] Testing layer {layer}...", end=" ")
1628
+
1629
+ try:
1630
+ # Collect activations for this layer
1631
+ pos_acts = []
1632
+ neg_acts = []
1633
+
1634
+ for pair in train_pairs.pairs:
1635
+ updated_pair = collector.collect_for_pair(
1636
+ pair,
1637
+ layers=[layer_str],
1638
+ aggregation=ActivationAggregationStrategy.MEAN_POOLING,
1639
+ return_full_sequence=False,
1640
+ normalize_layers=False,
1641
+ )
1642
+
1643
+ if (
1644
+ updated_pair.positive_response.layers_activations
1645
+ and layer_str in updated_pair.positive_response.layers_activations
1646
+ ):
1647
+ act = updated_pair.positive_response.layers_activations[layer_str]
1648
+ if act is not None:
1649
+ pos_acts.append(act)
1650
+
1651
+ if (
1652
+ updated_pair.negative_response.layers_activations
1653
+ and layer_str in updated_pair.negative_response.layers_activations
1654
+ ):
1655
+ act = updated_pair.negative_response.layers_activations[layer_str]
1656
+ if act is not None:
1657
+ neg_acts.append(act)
1658
+
1659
+ if len(pos_acts) == 0 or len(neg_acts) == 0:
1660
+ print("āš ļø No activations collected")
1661
+ continue
1662
+
1663
+ # Train steering vector using selected method
1664
+ steering_method = create_steering_method(args.method, args)
1665
+ steering_vector = steering_method.train_for_layer(pos_acts, neg_acts)
1666
+ if False: # Compatibility placeholder
1667
+ print(f"āš ļø Method {args.method} not supported")
1668
+ continue
1669
+
1670
+ # Create steering plan
1671
+ steering_vec = SteeringVector(vector=steering_vector, scale=args.strength)
1672
+ steering_plan = SteeringPlan(
1673
+ layers={layer_str: steering_vec}, layers_description=[f"{args.method} steering layer={layer}"]
1674
+ )
1675
+
1676
+ # Evaluate
1677
+ model.apply_steering(steering_plan)
1678
+
1679
+ test_scores = []
1680
+ detailed_results = []
1681
+ for pair in test_pairs.pairs:
1682
+ choices = [pair.negative_response.model_response, pair.positive_response.model_response]
1683
+ expected = pair.positive_response.model_response
1684
+ test_code = pair.metadata.get("test_code") if pair.metadata else None
1685
+
1686
+ eval_result = evaluator.evaluate(
1687
+ response="",
1688
+ expected=expected,
1689
+ model=model,
1690
+ question=pair.prompt,
1691
+ choices=choices,
1692
+ steering_plan=steering_plan,
1693
+ test_code=test_code,
1694
+ task_name=task_name,
1695
+ )
1696
+
1697
+ is_correct = eval_result.ground_truth == "TRUTHFUL"
1698
+ test_scores.append(1.0 if is_correct else 0.0)
1699
+
1700
+ # Save full evaluation details
1701
+ detailed_results.append(
1702
+ {
1703
+ "question": pair.prompt,
1704
+ "choices": choices,
1705
+ "expected": expected,
1706
+ "ground_truth": eval_result.ground_truth,
1707
+ "method_used": eval_result.method_used,
1708
+ "confidence": eval_result.confidence,
1709
+ "details": eval_result.details,
1710
+ "meta": dict(eval_result.meta) if eval_result.meta else {},
1711
+ "is_correct": is_correct,
1712
+ }
1713
+ )
1714
+
1715
+ model.clear_steering()
1716
+
1717
+ accuracy = np.mean(test_scores) if len(test_scores) > 0 else 0.0
1718
+ layer_results[layer] = {
1719
+ "accuracy": accuracy,
1720
+ "num_test_samples": len(test_scores),
1721
+ "detailed_results": detailed_results,
1722
+ }
1723
+
1724
+ print(f"accuracy={accuracy:.3f}")
1725
+
1726
+ if accuracy > best_accuracy:
1727
+ best_accuracy = accuracy
1728
+ best_layer = layer
1729
+
1730
+ except Exception as e:
1731
+ print(f"āŒ Error: {e}")
1732
+ if args.verbose:
1733
+ import traceback
1734
+
1735
+ traceback.print_exc()
1736
+
1737
+ # Results
1738
+ print(f"\n{'=' * 80}")
1739
+ print("šŸ“Š LAYER OPTIMIZATION COMPLETE")
1740
+ print(f"{'=' * 80}")
1741
+ print(f" Best layer: {best_layer}")
1742
+ print(f" Best accuracy: {best_accuracy:.4f}")
1743
+ print(f"{'=' * 80}\n")
1744
+
1745
+ # Save results
1746
+ results_file = f"./optimization_results/steering_optimize_layer_{args.task}_{args.model.replace('/', '_')}.json"
1747
+ import os
1748
+
1749
+ os.makedirs(os.path.dirname(results_file), exist_ok=True)
1750
+
1751
+ output_data = {
1752
+ "model": args.model,
1753
+ "task": args.task,
1754
+ "method": args.method,
1755
+ "strength": args.strength,
1756
+ "best_layer": best_layer,
1757
+ "best_accuracy": best_accuracy,
1758
+ "layer_results": {str(k): v for k, v in layer_results.items()},
1759
+ "limit": args.limit,
1760
+ }
1761
+
1762
+ with open(results_file, "w") as f:
1763
+ json.dump(output_data, f, indent=2)
1764
+
1765
+ print(f"āœ… Results saved to: {results_file}\n")
1766
+
1767
+ # Create plot
1768
+ if args.save_plot and len(layer_results) > 0:
1769
+ plot_path_svg = f"steering_optimize_layer_{args.task}_{args.model.replace('/', '_')}.svg"
1770
+ plot_path_png = f"steering_optimize_layer_{args.task}_{args.model.replace('/', '_')}.png"
1771
+
1772
+ layers = sorted(layer_results.keys())
1773
+ accuracies = [layer_results[l]["accuracy"] for l in layers]
1774
+
1775
+ chart = LineChart(style=1, figsize=(10, 6), show_markers=True)
1776
+ fig, ax = plt.subplots(1, 1, figsize=(10, 6))
1777
+
1778
+ chart.plot_multiple(
1779
+ x=layers,
1780
+ y_series=[accuracies],
1781
+ labels=["Accuracy"],
1782
+ title=f"Layer Optimization\n{args.model} on {args.task}",
1783
+ xlabel="Layer",
1784
+ ylabel="Accuracy",
1785
+ fig=fig,
1786
+ ax=ax,
1787
+ output_format="png",
1788
+ )
1789
+
1790
+ # Add vertical line for optimal layer
1791
+ ax.axvline(
1792
+ x=best_layer, color="#2ecc71", linestyle="--", linewidth=2, label=f"Best: Layer {best_layer}", alpha=0.7
1793
+ )
1794
+ ax.legend()
1795
+
1796
+ fig.savefig(plot_path_svg, format="svg", bbox_inches="tight")
1797
+ fig.savefig(plot_path_png, dpi=150, bbox_inches="tight")
1798
+ plt.close(fig)
1799
+
1800
+ print("šŸ’¾ Layer optimization plot saved to:")
1801
+ print(f" SVG: {plot_path_svg}")
1802
+ print(f" PNG: {plot_path_png}\n")
1803
+
1804
+ # Store result in cache
1805
+ save_as_default = getattr(args, "save_as_default", False)
1806
+ if best_layer is not None and best_accuracy > 0:
1807
+ print("šŸ’¾ Storing result in optimization cache...")
1808
+ cache_key = store_optimization(
1809
+ model=args.model,
1810
+ task=args.task,
1811
+ layer=best_layer,
1812
+ strength=args.strength,
1813
+ method=args.method,
1814
+ strategy="constant",
1815
+ score=best_accuracy,
1816
+ metric="accuracy",
1817
+ metadata={"limit": args.limit},
1818
+ set_as_default=save_as_default,
1819
+ )
1820
+ print(f" āœ“ Cached: {cache_key}")
1821
+ if save_as_default:
1822
+ print(" āœ“ Set as default configuration")
1823
+
1824
+ return {
1825
+ "action": "optimize-layer",
1826
+ "task": args.task,
1827
+ "method": args.method,
1828
+ "best_layer": best_layer,
1829
+ "best_accuracy": best_accuracy,
1830
+ "results_file": results_file,
1831
+ }
1832
+
1833
+
1834
+ def execute_optimize_strength(args, model, loader):
1835
+ """Execute strength optimization - find the best steering strength."""
1836
+ import matplotlib.pyplot as plt
1837
+ from wisent_plots import LineChart
1838
+
1839
+ from wisent.core.activations.activations_collector import ActivationCollector
1840
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
1841
+ from wisent.core.models.core.atoms import SteeringPlan, SteeringVector
1842
+ from wisent.core.cli.steering_method_trainer import create_steering_method
1843
+
1844
+ # Check for cached results if --use-cached is specified
1845
+ use_cached = getattr(args, "use_cached", False)
1846
+ save_as_default = getattr(args, "save_as_default", False)
1847
+
1848
+ if use_cached:
1849
+ print(f"\nšŸ“¦ Checking optimization cache for {args.task}/{args.method}...")
1850
+ cached = get_cached_optimization(args.model, args.task, args.method)
1851
+ if cached:
1852
+ print(
1853
+ f" āœ“ Found cached result: layer={cached.layer}, strength={cached.strength}, score={cached.score:.3f}"
1854
+ )
1855
+ return {
1856
+ "model": args.model,
1857
+ "action": "optimize-strength",
1858
+ "task": args.task,
1859
+ "method": args.method,
1860
+ "best_layer": cached.layer,
1861
+ "best_strength": cached.strength,
1862
+ "best_accuracy": cached.score,
1863
+ "from_cache": True,
1864
+ }
1865
+ print(" No cached results found. Running optimization...")
1866
+
1867
+ print(f"šŸ’Ŗ Optimizing steering strength for task: {args.task}\n")
1868
+ print(f" Method: {args.method}")
1869
+ print(f" Layer: {args.layer}")
1870
+ print(f" Strength range: {args.strength_range[0]} to {args.strength_range[1]}")
1871
+ print(f" Num steps: {args.num_strength_steps}")
1872
+ print(f" Limit: {args.limit} samples\n")
1873
+
1874
+ # Load task data
1875
+ print("šŸ“Š Loading task data...")
1876
+ result = loader._load_one_task(
1877
+ task_name=args.task, split_ratio=0.8, seed=42, limit=args.limit, training_limit=None, testing_limit=None
1878
+ )
1879
+
1880
+ train_pairs = result["train_qa_pairs"]
1881
+ test_pairs = result["test_qa_pairs"]
1882
+ print(f" āœ“ Loaded {len(train_pairs.pairs)} train, {len(test_pairs.pairs)} test pairs\n")
1883
+
1884
+ # Initialize evaluator
1885
+ EvaluatorRotator.discover_evaluators("wisent.core.evaluators.benchmark_specific")
1886
+ evaluator = EvaluatorRotator(evaluator=None, task_name=args.task)
1887
+ print(f" āœ“ Using evaluator: {evaluator._plugin.name}\n")
1888
+
1889
+ # Collect activations ONCE
1890
+ layer_str = str(args.layer)
1891
+ collector = ActivationCollector(model=model, store_device="cpu")
1892
+
1893
+ print("šŸŽÆ Collecting training activations (ONCE)...")
1894
+ pos_acts = []
1895
+ neg_acts = []
1896
+
1897
+ for i, pair in enumerate(train_pairs.pairs):
1898
+ if i % 10 == 0:
1899
+ print(f" Processing train pair {i + 1}/{len(train_pairs.pairs)}...", end="\r")
1900
+
1901
+ updated_pair = collector.collect_for_pair(
1902
+ pair,
1903
+ layers=[layer_str],
1904
+ aggregation=ActivationAggregationStrategy.MEAN_POOLING,
1905
+ return_full_sequence=False,
1906
+ normalize_layers=False,
1907
+ )
1908
+
1909
+ if (
1910
+ updated_pair.positive_response.layers_activations
1911
+ and layer_str in updated_pair.positive_response.layers_activations
1912
+ ):
1913
+ act = updated_pair.positive_response.layers_activations[layer_str]
1914
+ if act is not None:
1915
+ pos_acts.append(act)
1916
+
1917
+ if (
1918
+ updated_pair.negative_response.layers_activations
1919
+ and layer_str in updated_pair.negative_response.layers_activations
1920
+ ):
1921
+ act = updated_pair.negative_response.layers_activations[layer_str]
1922
+ if act is not None:
1923
+ neg_acts.append(act)
1924
+
1925
+ print(f" Processing train pair {len(train_pairs.pairs)}/{len(train_pairs.pairs)}... Done!")
1926
+ print(f" āœ“ Collected {len(pos_acts)} positive, {len(neg_acts)} negative activations\n")
1927
+
1928
+ # Train steering vector using selected method
1929
+ steering_method = create_steering_method(args.method, args)
1930
+ steering_vector = steering_method.train_for_layer(pos_acts, neg_acts)
1931
+ if False: # Compatibility placeholder
1932
+ print(f"āŒ Method {args.method} not supported")
1933
+ return {
1934
+ "action": "optimize-strength",
1935
+ "task": args.task,
1936
+ "method": args.method,
1937
+ "status": "method_not_supported",
1938
+ }
1939
+
1940
+ # Generate strength values to test
1941
+ min_strength, max_strength = args.strength_range
1942
+ strengths_to_test = np.linspace(min_strength, max_strength, args.num_strength_steps)
1943
+
1944
+ print(
1945
+ f"šŸ” Testing {len(strengths_to_test)} strength values: {strengths_to_test[0]:.2f} to {strengths_to_test[-1]:.2f}\n"
1946
+ )
1947
+
1948
+ strength_results = {}
1949
+ best_strength = None
1950
+ best_accuracy = 0.0
1951
+
1952
+ for strength_idx, strength in enumerate(strengths_to_test, 1):
1953
+ print(f" [{strength_idx}/{len(strengths_to_test)}] Testing strength {strength:.2f}...", end=" ")
1954
+
1955
+ try:
1956
+ # Create steering plan with this strength
1957
+ steering_vec = SteeringVector(vector=steering_vector, scale=float(strength))
1958
+ steering_plan = SteeringPlan(
1959
+ layers={layer_str: steering_vec}, layers_description=[f"{args.method} steering strength={strength:.2f}"]
1960
+ )
1961
+
1962
+ # Evaluate
1963
+ model.apply_steering(steering_plan)
1964
+
1965
+ test_scores = []
1966
+ detailed_results = []
1967
+ for pair in test_pairs.pairs:
1968
+ choices = [pair.negative_response.model_response, pair.positive_response.model_response]
1969
+ expected = pair.positive_response.model_response
1970
+ test_code = pair.metadata.get("test_code") if pair.metadata else None
1971
+
1972
+ eval_result = evaluator.evaluate(
1973
+ response="",
1974
+ expected=expected,
1975
+ model=model,
1976
+ question=pair.prompt,
1977
+ choices=choices,
1978
+ steering_plan=steering_plan,
1979
+ test_code=test_code,
1980
+ task_name=task_name,
1981
+ )
1982
+
1983
+ is_correct = eval_result.ground_truth == "TRUTHFUL"
1984
+ test_scores.append(1.0 if is_correct else 0.0)
1985
+
1986
+ # Save full evaluation details
1987
+ detailed_results.append(
1988
+ {
1989
+ "question": pair.prompt,
1990
+ "choices": choices,
1991
+ "expected": expected,
1992
+ "ground_truth": eval_result.ground_truth,
1993
+ "method_used": eval_result.method_used,
1994
+ "confidence": eval_result.confidence,
1995
+ "details": eval_result.details,
1996
+ "meta": dict(eval_result.meta) if eval_result.meta else {},
1997
+ "is_correct": is_correct,
1998
+ }
1999
+ )
2000
+
2001
+ model.clear_steering()
2002
+
2003
+ accuracy = np.mean(test_scores) if len(test_scores) > 0 else 0.0
2004
+ strength_results[float(strength)] = {
2005
+ "accuracy": accuracy,
2006
+ "num_test_samples": len(test_scores),
2007
+ "detailed_results": detailed_results,
2008
+ }
2009
+
2010
+ print(f"accuracy={accuracy:.3f}")
2011
+
2012
+ if accuracy > best_accuracy:
2013
+ best_accuracy = accuracy
2014
+ best_strength = float(strength)
2015
+
2016
+ except Exception as e:
2017
+ print(f"āŒ Error: {e}")
2018
+ if args.verbose:
2019
+ import traceback
2020
+
2021
+ traceback.print_exc()
2022
+
2023
+ # Results
2024
+ print(f"\n{'=' * 80}")
2025
+ print("šŸ“Š STRENGTH OPTIMIZATION COMPLETE")
2026
+ print(f"{'=' * 80}")
2027
+ print(f" Best strength: {best_strength:.2f}")
2028
+ print(f" Best accuracy: {best_accuracy:.4f}")
2029
+ print(f"{'=' * 80}\n")
2030
+
2031
+ # Save results
2032
+ results_file = f"./optimization_results/steering_optimize_strength_{args.task}_{args.model.replace('/', '_')}.json"
2033
+ import os
2034
+
2035
+ os.makedirs(os.path.dirname(results_file), exist_ok=True)
2036
+
2037
+ output_data = {
2038
+ "model": args.model,
2039
+ "task": args.task,
2040
+ "method": args.method,
2041
+ "layer": args.layer,
2042
+ "best_strength": best_strength,
2043
+ "best_accuracy": best_accuracy,
2044
+ "strength_results": {str(k): v for k, v in strength_results.items()},
2045
+ "limit": args.limit,
2046
+ }
2047
+
2048
+ with open(results_file, "w") as f:
2049
+ json.dump(output_data, f, indent=2)
2050
+
2051
+ print(f"āœ… Results saved to: {results_file}\n")
2052
+
2053
+ # Create plot
2054
+ if args.save_plot and len(strength_results) > 0:
2055
+ plot_path_svg = f"steering_optimize_strength_{args.task}_{args.model.replace('/', '_')}.svg"
2056
+ plot_path_png = f"steering_optimize_strength_{args.task}_{args.model.replace('/', '_')}.png"
2057
+
2058
+ strengths = sorted(strength_results.keys())
2059
+ accuracies = [strength_results[s]["accuracy"] for s in strengths]
2060
+
2061
+ chart = LineChart(style=1, figsize=(10, 6), show_markers=True)
2062
+ fig, ax = plt.subplots(1, 1, figsize=(10, 6))
2063
+
2064
+ chart.plot_multiple(
2065
+ x=strengths,
2066
+ y_series=[accuracies],
2067
+ labels=["Accuracy"],
2068
+ title=f"Strength Optimization\n{args.model} on {args.task}",
2069
+ xlabel="Steering Strength",
2070
+ ylabel="Accuracy",
2071
+ fig=fig,
2072
+ ax=ax,
2073
+ output_format="png",
2074
+ )
2075
+
2076
+ # Add vertical line for optimal strength
2077
+ ax.axvline(
2078
+ x=best_strength, color="#2ecc71", linestyle="--", linewidth=2, label=f"Best: {best_strength:.2f}", alpha=0.7
2079
+ )
2080
+ ax.legend()
2081
+
2082
+ fig.savefig(plot_path_svg, format="svg", bbox_inches="tight")
2083
+ fig.savefig(plot_path_png, dpi=150, bbox_inches="tight")
2084
+ plt.close(fig)
2085
+
2086
+ print("šŸ’¾ Strength optimization plot saved to:")
2087
+ print(f" SVG: {plot_path_svg}")
2088
+ print(f" PNG: {plot_path_png}\n")
2089
+
2090
+ # Store result in cache
2091
+ save_as_default = getattr(args, "save_as_default", False)
2092
+ if best_strength is not None and best_accuracy > 0:
2093
+ print("šŸ’¾ Storing result in optimization cache...")
2094
+ cache_key = store_optimization(
2095
+ model=args.model,
2096
+ task=args.task,
2097
+ layer=args.layer,
2098
+ strength=best_strength,
2099
+ method=args.method,
2100
+ strategy="constant",
2101
+ score=best_accuracy,
2102
+ metric="accuracy",
2103
+ metadata={"limit": args.limit, "strength_range": args.strength_range},
2104
+ set_as_default=save_as_default,
2105
+ )
2106
+ print(f" āœ“ Cached: {cache_key}")
2107
+ if save_as_default:
2108
+ print(" āœ“ Set as default configuration")
2109
+
2110
+ return {
2111
+ "action": "optimize-strength",
2112
+ "task": args.task,
2113
+ "method": args.method,
2114
+ "best_strength": best_strength,
2115
+ "best_accuracy": best_accuracy,
2116
+ "results_file": results_file,
2117
+ }
2118
+
2119
+
2120
+ def execute_auto(args, model, loader):
2121
+ """Execute automatic optimization - optimizes layer AND strength together."""
2122
+ import matplotlib.pyplot as plt
2123
+
2124
+ from wisent.core.activations.activations_collector import ActivationCollector
2125
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
2126
+ from wisent.core.models.core.atoms import SteeringPlan, SteeringVector
2127
+ from wisent.core.cli.steering_method_trainer import create_steering_method
2128
+
2129
+ # Check for cached results if --use-cached is specified
2130
+ use_cached = getattr(args, "use_cached", False)
2131
+ save_as_default = getattr(args, "save_as_default", False)
2132
+ task_name = args.task or "default"
2133
+
2134
+ if use_cached:
2135
+ print(f"\nšŸ“¦ Checking optimization cache for {task_name}...")
2136
+ for method in args.methods:
2137
+ cached = get_cached_optimization(args.model, task_name, method)
2138
+ if cached:
2139
+ print(
2140
+ f" āœ“ Found cached result for {method}: layer={cached.layer}, strength={cached.strength}, score={cached.score:.3f}"
2141
+ )
2142
+ return {
2143
+ "model": args.model,
2144
+ "action": "auto",
2145
+ "task": task_name,
2146
+ "best_method": method,
2147
+ "best_layer": cached.layer,
2148
+ "best_strength": cached.strength,
2149
+ "best_accuracy": cached.score,
2150
+ "from_cache": True,
2151
+ }
2152
+ print(" No cached results found. Running optimization...")
2153
+
2154
+ print("šŸ¤– Running automatic steering optimization...\n")
2155
+ print(f" Task: {args.task}")
2156
+ print(f" Methods: {', '.join(args.methods)}")
2157
+ print(f" Strength range: {args.strength_range}")
2158
+ print(f" Limit: {args.limit} samples\n")
2159
+
2160
+ # Load task data
2161
+ print("šŸ“Š Loading task data...")
2162
+ result = loader._load_one_task(
2163
+ task_name=args.task, split_ratio=0.8, seed=42, limit=args.limit, training_limit=None, testing_limit=None
2164
+ )
2165
+
2166
+ train_pairs = result["train_qa_pairs"]
2167
+ test_pairs = result["test_qa_pairs"]
2168
+ print(f" āœ“ Loaded {len(train_pairs.pairs)} train, {len(test_pairs.pairs)} test pairs\n")
2169
+
2170
+ # Initialize evaluator
2171
+ EvaluatorRotator.discover_evaluators("wisent.core.evaluators.benchmark_specific")
2172
+ evaluator = EvaluatorRotator(evaluator=None, task_name=args.task)
2173
+ print(f" āœ“ Using evaluator: {evaluator._plugin.name}\n")
2174
+
2175
+ # Define search space
2176
+ layers_to_test = list(
2177
+ range(max(0, model.num_layers // 2 - 2), min(model.num_layers, model.num_layers // 2 + 3))
2178
+ ) # Test 5 layers around middle
2179
+ min_strength, max_strength = args.strength_range
2180
+ strengths_to_test = np.linspace(min_strength, max_strength, 5) # 5 strength values
2181
+
2182
+ print("šŸ” Auto-optimizing layer and strength...")
2183
+ print(f" Testing {len(layers_to_test)} layers: {layers_to_test}")
2184
+ print(f" Testing {len(strengths_to_test)} strengths: {strengths_to_test[0]:.2f} to {strengths_to_test[-1]:.2f}")
2185
+ print(f" Total configurations: {len(layers_to_test) * len(strengths_to_test)}\n")
2186
+
2187
+ collector = ActivationCollector(model=model, store_device="cpu")
2188
+ all_results = {}
2189
+ best_config = None
2190
+ best_accuracy = 0.0
2191
+
2192
+ config_count = 0
2193
+ total_configs = len(layers_to_test) * len(strengths_to_test)
2194
+
2195
+ for layer in layers_to_test:
2196
+ layer_str = str(layer)
2197
+
2198
+ # Collect activations for this layer
2199
+ print(f" Collecting activations for layer {layer}...")
2200
+ pos_acts = []
2201
+ neg_acts = []
2202
+
2203
+ for pair in train_pairs.pairs:
2204
+ updated_pair = collector.collect_for_pair(
2205
+ pair,
2206
+ layers=[layer_str],
2207
+ aggregation=ActivationAggregationStrategy.MEAN_POOLING,
2208
+ return_full_sequence=False,
2209
+ normalize_layers=False,
2210
+ )
2211
+
2212
+ if (
2213
+ updated_pair.positive_response.layers_activations
2214
+ and layer_str in updated_pair.positive_response.layers_activations
2215
+ ):
2216
+ act = updated_pair.positive_response.layers_activations[layer_str]
2217
+ if act is not None:
2218
+ pos_acts.append(act)
2219
+
2220
+ if (
2221
+ updated_pair.negative_response.layers_activations
2222
+ and layer_str in updated_pair.negative_response.layers_activations
2223
+ ):
2224
+ act = updated_pair.negative_response.layers_activations[layer_str]
2225
+ if act is not None:
2226
+ neg_acts.append(act)
2227
+
2228
+ if len(pos_acts) == 0 or len(neg_acts) == 0:
2229
+ print(f" āš ļø No activations collected for layer {layer}")
2230
+ continue
2231
+
2232
+ # Train steering vector for this layer using selected method
2233
+ method_name = args.methods[0] if args.methods else "CAA"
2234
+ steering_method = create_steering_method(method_name, args)
2235
+ steering_vector = steering_method.train_for_layer(pos_acts, neg_acts)
2236
+ if False: # Compatibility placeholder
2237
+ print(" āš ļø Only CAA method is supported")
2238
+ continue
2239
+
2240
+ # Test different strengths for this layer
2241
+ for strength in strengths_to_test:
2242
+ config_count += 1
2243
+ print(f" [{config_count}/{total_configs}] Layer {layer}, Strength {strength:.2f}...", end=" ")
2244
+
2245
+ try:
2246
+ # Create steering plan
2247
+ steering_vec = SteeringVector(vector=steering_vector, scale=float(strength))
2248
+ steering_plan = SteeringPlan(
2249
+ layers={layer_str: steering_vec}, layers_description=[f"CAA layer={layer}, strength={strength:.2f}"]
2250
+ )
2251
+
2252
+ # Evaluate
2253
+ model.apply_steering(steering_plan)
2254
+
2255
+ test_scores = []
2256
+ detailed_results = []
2257
+ for pair in test_pairs.pairs:
2258
+ choices = [pair.negative_response.model_response, pair.positive_response.model_response]
2259
+ expected = pair.positive_response.model_response
2260
+ test_code = pair.metadata.get("test_code") if pair.metadata else None
2261
+
2262
+ eval_result = evaluator.evaluate(
2263
+ response="",
2264
+ expected=expected,
2265
+ model=model,
2266
+ question=pair.prompt,
2267
+ choices=choices,
2268
+ steering_plan=steering_plan,
2269
+ test_code=test_code,
2270
+ task_name=task_name,
2271
+ )
2272
+
2273
+ is_correct = eval_result.ground_truth == "TRUTHFUL"
2274
+ test_scores.append(1.0 if is_correct else 0.0)
2275
+
2276
+ # Save full evaluation details
2277
+ detailed_results.append(
2278
+ {
2279
+ "question": pair.prompt,
2280
+ "choices": choices,
2281
+ "expected": expected,
2282
+ "ground_truth": eval_result.ground_truth,
2283
+ "method_used": eval_result.method_used,
2284
+ "confidence": eval_result.confidence,
2285
+ "details": eval_result.details,
2286
+ "meta": dict(eval_result.meta) if eval_result.meta else {},
2287
+ "is_correct": is_correct,
2288
+ }
2289
+ )
2290
+
2291
+ model.clear_steering()
2292
+
2293
+ accuracy = np.mean(test_scores) if len(test_scores) > 0 else 0.0
2294
+ all_results[(layer, float(strength))] = {
2295
+ "accuracy": accuracy,
2296
+ "num_test_samples": len(test_scores),
2297
+ "detailed_results": detailed_results,
2298
+ }
2299
+
2300
+ print(f"accuracy={accuracy:.3f}")
2301
+
2302
+ if accuracy > best_accuracy:
2303
+ best_accuracy = accuracy
2304
+ best_config = {"layer": layer, "strength": float(strength), "accuracy": accuracy}
2305
+
2306
+ except Exception as e:
2307
+ print(f"āŒ Error: {e}")
2308
+ if args.verbose:
2309
+ import traceback
2310
+
2311
+ traceback.print_exc()
2312
+
2313
+ # Results
2314
+ print(f"\n{'=' * 80}")
2315
+ print("šŸ“Š AUTO OPTIMIZATION COMPLETE")
2316
+ print(f"{'=' * 80}")
2317
+ if best_config:
2318
+ print(f" Best layer: {best_config['layer']}")
2319
+ print(f" Best strength: {best_config['strength']:.2f}")
2320
+ print(f" Best accuracy: {best_config['accuracy']:.4f}")
2321
+ else:
2322
+ print(" āš ļø No valid configuration found")
2323
+ print(f"{'=' * 80}\n")
2324
+
2325
+ # Save results
2326
+ results_file = f"./optimization_results/steering_auto_{args.task}_{args.model.replace('/', '_')}.json"
2327
+ import os
2328
+
2329
+ os.makedirs(os.path.dirname(results_file), exist_ok=True)
2330
+
2331
+ output_data = {
2332
+ "model": args.model,
2333
+ "task": args.task,
2334
+ "methods": args.methods,
2335
+ "best_config": best_config,
2336
+ "all_results": {f"layer{k[0]}_strength{k[1]:.2f}": v for k, v in all_results.items()},
2337
+ "limit": args.limit,
2338
+ }
2339
+
2340
+ with open(results_file, "w") as f:
2341
+ json.dump(output_data, f, indent=2)
2342
+
2343
+ print(f"āœ… Results saved to: {results_file}\n")
2344
+
2345
+ # Create heatmap plot
2346
+ if args.save_plot and len(all_results) > 0 and best_config:
2347
+ plot_path_svg = f"steering_auto_{args.task}_{args.model.replace('/', '_')}.svg"
2348
+ plot_path_png = f"steering_auto_{args.task}_{args.model.replace('/', '_')}.png"
2349
+
2350
+ # Prepare data for heatmap
2351
+ layers = sorted(set(k[0] for k in all_results))
2352
+ strengths = sorted(set(k[1] for k in all_results))
2353
+
2354
+ # Create accuracy matrix
2355
+ accuracy_matrix = np.zeros((len(strengths), len(layers)))
2356
+ for i, strength in enumerate(strengths):
2357
+ for j, layer in enumerate(layers):
2358
+ if (layer, strength) in all_results:
2359
+ accuracy_matrix[i, j] = all_results[(layer, strength)]["accuracy"]
2360
+
2361
+ fig, ax = plt.subplots(1, 1, figsize=(10, 8))
2362
+
2363
+ im = ax.imshow(accuracy_matrix, cmap="viridis", aspect="auto")
2364
+
2365
+ # Set ticks and labels
2366
+ ax.set_xticks(np.arange(len(layers)))
2367
+ ax.set_yticks(np.arange(len(strengths)))
2368
+ ax.set_xticklabels(layers)
2369
+ ax.set_yticklabels([f"{s:.2f}" for s in strengths])
2370
+
2371
+ # Labels
2372
+ ax.set_xlabel("Layer")
2373
+ ax.set_ylabel("Strength")
2374
+ ax.set_title(f"Auto Optimization Heatmap\n{args.model} on {args.task}")
2375
+
2376
+ # Colorbar
2377
+ cbar = plt.colorbar(im, ax=ax)
2378
+ cbar.set_label("Accuracy", rotation=270, labelpad=15)
2379
+
2380
+ # Mark best configuration
2381
+ best_layer_idx = layers.index(best_config["layer"])
2382
+ best_strength_idx = strengths.index(best_config["strength"])
2383
+ ax.plot(
2384
+ best_layer_idx,
2385
+ best_strength_idx,
2386
+ "r*",
2387
+ markersize=20,
2388
+ label=f"Best: L{best_config['layer']}, S{best_config['strength']:.2f}",
2389
+ )
2390
+ ax.legend()
2391
+
2392
+ fig.savefig(plot_path_svg, format="svg", bbox_inches="tight")
2393
+ fig.savefig(plot_path_png, dpi=150, bbox_inches="tight")
2394
+ plt.close(fig)
2395
+
2396
+ print("šŸ’¾ Auto optimization heatmap saved to:")
2397
+ print(f" SVG: {plot_path_svg}")
2398
+ print(f" PNG: {plot_path_png}\n")
2399
+
2400
+ # Store result in cache
2401
+ save_as_default = getattr(args, "save_as_default", False)
2402
+ if best_config and best_config.get("accuracy", 0) > 0:
2403
+ print("šŸ’¾ Storing result in optimization cache...")
2404
+ cache_key = store_optimization(
2405
+ model=args.model,
2406
+ task=args.task or "auto",
2407
+ layer=best_config["layer"],
2408
+ strength=best_config["strength"],
2409
+ method=best_config.get("method", "CAA"),
2410
+ strategy=best_config.get("strategy", "constant"),
2411
+ score=best_config["accuracy"],
2412
+ metric="accuracy",
2413
+ metadata={"limit": args.limit, "strength_range": list(args.strength_range)},
2414
+ set_as_default=save_as_default,
2415
+ )
2416
+ print(f" āœ“ Cached: {cache_key}")
2417
+ if save_as_default:
2418
+ print(" āœ“ Set as default configuration")
2419
+
2420
+ return {
2421
+ "action": "auto",
2422
+ "task": args.task,
2423
+ "methods": args.methods,
2424
+ "best_config": best_config,
2425
+ "results_file": results_file,
2426
+ }
2427
+
2428
+
2429
+ def execute_personalization(args, model):
2430
+ """
2431
+ Execute personalization optimization - find optimal parameters for trait steering.
2432
+
2433
+ This optimizes ALL steering parameters for personality/trait vectors by:
2434
+ 1. Generating synthetic contrastive pairs for the trait
2435
+ 2. Testing all combinations of:
2436
+ - Layers (where to apply steering)
2437
+ - Strengths (how strong the steering signal is)
2438
+ - Token aggregation strategies (LAST_TOKEN, MEAN_POOLING, FIRST_TOKEN)
2439
+ - Prompt construction strategies (CHAT_TEMPLATE, DIRECT_COMPLETION)
2440
+ 3. Evaluating each configuration using personalization metrics:
2441
+ - Difference: Is the steered response different from baseline?
2442
+ - Quality: Is the response coherent (not lobotomized)?
2443
+ - Alignment: Does the response match the intended trait?
2444
+ 4. Selecting the configuration with the highest overall score
2445
+ """
2446
+ import os
2447
+
2448
+ import torch
2449
+
2450
+ from wisent.core.activations.activations_collector import ActivationCollector
2451
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
2452
+ from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
2453
+ from wisent.core.evaluators.steering_evaluators import PersonalizationEvaluator
2454
+ from wisent.core.models.core.atoms import SteeringPlan, SteeringVector
2455
+ from wisent.core.cli.steering_method_trainer import create_steering_method
2456
+ from wisent.core.synthetic.cleaners.pairs_cleaner import PairsCleaner
2457
+ from wisent.core.synthetic.db_instructions.mini_dp import Default_DB_Instructions
2458
+ from wisent.core.synthetic.generators.diversities.methods.fast_diversity import FastDiversity
2459
+ from wisent.core.synthetic.generators.pairs_generator import SyntheticContrastivePairsGenerator
2460
+
2461
+ trait = args.trait
2462
+ trait_name = args.trait_name or trait.split()[0].lower()
2463
+
2464
+ print(f"\n{'=' * 80}", flush=True)
2465
+ print("šŸŽ­ PERSONALIZATION OPTIMIZATION (COMPREHENSIVE)", flush=True)
2466
+ print(f"{'=' * 80}", flush=True)
2467
+ print(f" Trait: {trait}", flush=True)
2468
+ print(f" Trait Name: {trait_name}", flush=True)
2469
+ print(f" Model: {args.model}", flush=True)
2470
+ print(f" Num Pairs: {args.num_pairs}", flush=True)
2471
+ print(f" Num Test Prompts: {args.num_test_prompts}", flush=True)
2472
+ print(f" Output Directory: {args.output_dir}", flush=True)
2473
+ print(f"{'=' * 80}\n", flush=True)
2474
+
2475
+ # Create output directory
2476
+ os.makedirs(args.output_dir, exist_ok=True)
2477
+ os.makedirs(os.path.join(args.output_dir, "vectors"), exist_ok=True)
2478
+
2479
+ # Determine layers to test - ALL layers by default
2480
+ if args.layers:
2481
+ layers_to_test = args.layers
2482
+ else:
2483
+ # Test ALL layers (1-indexed, since activation collector uses 1-based indexing)
2484
+ num_layers = model.num_layers
2485
+ layers_to_test = list(range(1, num_layers + 1))
2486
+
2487
+ # Determine strengths to test
2488
+ min_strength, max_strength = args.strength_range
2489
+ strengths_to_test = np.linspace(min_strength, max_strength, args.num_strength_steps)
2490
+
2491
+ # Token aggregation strategies to test - ALL strategies
2492
+ token_aggregations_to_test = [
2493
+ ActivationAggregationStrategy.LAST_TOKEN,
2494
+ ActivationAggregationStrategy.MEAN_POOLING,
2495
+ ActivationAggregationStrategy.FIRST_TOKEN,
2496
+ ActivationAggregationStrategy.MAX_POOLING,
2497
+ ]
2498
+
2499
+ # Prompt construction strategies to test - ALL strategies
2500
+ prompt_constructions_to_test = [
2501
+ PromptConstructionStrategy.CHAT_TEMPLATE,
2502
+ PromptConstructionStrategy.DIRECT_COMPLETION,
2503
+ PromptConstructionStrategy.INSTRUCTION_FOLLOWING,
2504
+ PromptConstructionStrategy.ROLE_PLAYING,
2505
+ PromptConstructionStrategy.MULTIPLE_CHOICE,
2506
+ ]
2507
+
2508
+ # Steering application strategies to test - ALL strategies
2509
+ steering_strategies_to_test = ["constant", "initial_only", "diminishing", "all_equal"]
2510
+
2511
+ total_configs = (
2512
+ len(layers_to_test)
2513
+ * len(strengths_to_test)
2514
+ * len(steering_strategies_to_test)
2515
+ * len(token_aggregations_to_test)
2516
+ * len(prompt_constructions_to_test)
2517
+ )
2518
+
2519
+ print("šŸ“Š Search Space:", flush=True)
2520
+ print(f" Layers: {layers_to_test} ({len(layers_to_test)} total)", flush=True)
2521
+ print(f" Strengths: {[f'{s:.2f}' for s in strengths_to_test]}", flush=True)
2522
+ print(f" Steering Strategies: {steering_strategies_to_test}", flush=True)
2523
+ print(f" Token Aggregations: {[t.value for t in token_aggregations_to_test]}", flush=True)
2524
+ print(f" Prompt Constructions: {[p.value for p in prompt_constructions_to_test]}", flush=True)
2525
+ print(f" Total configurations: {total_configs}\n", flush=True)
2526
+
2527
+ # Step 1: Generate synthetic contrastive pairs
2528
+ print(f"šŸ”§ Step 1: Generating {args.num_pairs} synthetic contrastive pairs...", flush=True)
2529
+
2530
+ # Create the pair generator with required components
2531
+ cleaner = PairsCleaner(steps=[]) # Minimal cleaning
2532
+ diversity = FastDiversity()
2533
+ db_instructions = Default_DB_Instructions()
2534
+
2535
+ pair_generator = SyntheticContrastivePairsGenerator(
2536
+ model=model,
2537
+ generation_config=get_generate_kwargs(max_new_tokens=150),
2538
+ contrastive_set_name=f"{trait_name}_pairs",
2539
+ trait_description=trait,
2540
+ trait_label=trait_name,
2541
+ db_instructions=db_instructions,
2542
+ cleaner=cleaner,
2543
+ diversity=diversity,
2544
+ )
2545
+
2546
+ pair_set, generation_report = pair_generator.generate(num_pairs=args.num_pairs)
2547
+ pairs = pair_set.pairs
2548
+
2549
+ # Extract positive and negative examples for alignment evaluation
2550
+ positive_examples = [p.positive_response.model_response for p in pairs]
2551
+ negative_examples = [p.negative_response.model_response for p in pairs]
2552
+
2553
+ print(f" āœ“ Generated {len(pairs)} contrastive pairs\n", flush=True)
2554
+
2555
+ # Generate test prompts for evaluation
2556
+ test_prompts = [
2557
+ "What's your favorite food?",
2558
+ "How do you spend your weekends?",
2559
+ "What motivates you in life?",
2560
+ "How do you handle setbacks?",
2561
+ "What's your opinion on teamwork?",
2562
+ ][: args.num_test_prompts]
2563
+
2564
+ print("šŸ“ Test prompts for evaluation:", flush=True)
2565
+ for i, prompt in enumerate(test_prompts, 1):
2566
+ print(f" {i}. {prompt}", flush=True)
2567
+ print(flush=True)
2568
+
2569
+ # Initialize activation collector
2570
+ collector = ActivationCollector(model=model, store_device="cpu")
2571
+
2572
+ # Track results for all configurations
2573
+ all_results = {}
2574
+ best_config = None
2575
+ best_score = -1.0
2576
+ best_steering_vector = None
2577
+
2578
+ # Cache for steering vectors per (layer, token_agg, prompt_const) combination
2579
+ # to avoid recomputing activations unnecessarily
2580
+ steering_vector_cache = {}
2581
+
2582
+ # Checkpoint file for resuming interrupted runs
2583
+ checkpoint_file = os.path.join(args.output_dir, f"{trait_name}_checkpoint.json")
2584
+ completed_configs = set()
2585
+
2586
+ # Load checkpoint if it exists (resume mode) - check local first, then S3
2587
+ if not os.path.exists(checkpoint_file):
2588
+ # Try to download from S3
2589
+ try:
2590
+ import subprocess
2591
+ s3_checkpoint_path = f"s3://wisent-bucket/checkpoints/{trait_name}_checkpoint.json"
2592
+ print(f"\nšŸ“‚ Checking S3 for checkpoint: {s3_checkpoint_path}", flush=True)
2593
+ result = subprocess.run(
2594
+ ["aws", "s3", "cp", s3_checkpoint_path, checkpoint_file],
2595
+ capture_output=True,
2596
+ timeout=60
2597
+ )
2598
+ if result.returncode == 0:
2599
+ print(f" āœ“ Downloaded checkpoint from S3", flush=True)
2600
+ except Exception:
2601
+ pass # No S3 checkpoint available
2602
+
2603
+ if os.path.exists(checkpoint_file):
2604
+ print(f"\nšŸ“‚ Found checkpoint file: {checkpoint_file}", flush=True)
2605
+ try:
2606
+ with open(checkpoint_file, "r") as f:
2607
+ checkpoint_data = json.load(f)
2608
+ all_results = checkpoint_data.get("all_results", {})
2609
+ completed_configs = set(all_results.keys())
2610
+ best_config = checkpoint_data.get("best_config")
2611
+ best_score = checkpoint_data.get("best_score", -1.0)
2612
+ print(f" āœ“ Loaded {len(completed_configs)} completed configurations", flush=True)
2613
+ print(f" āœ“ Current best score: {best_score:.4f}", flush=True)
2614
+ if best_config:
2615
+ print(f" āœ“ Current best config: L{best_config['layer']} S{best_config['strength']:.2f}", flush=True)
2616
+ except Exception as e:
2617
+ print(f" āš ļø Failed to load checkpoint: {e}", flush=True)
2618
+ completed_configs = set()
2619
+
2620
+ # Step 2: Test all configurations
2621
+ print(f"\nšŸŽÆ Step 2: Testing {total_configs} configurations...", flush=True)
2622
+ if completed_configs:
2623
+ print(f" ā„¹ļø Resuming from checkpoint - {len(completed_configs)} already done, {total_configs - len(completed_configs)} remaining", flush=True)
2624
+
2625
+ config_count = 0
2626
+
2627
+ # Initialize file for saving generation examples if requested
2628
+ examples_file_path = None
2629
+ if args.save_all_generation_examples:
2630
+ os.makedirs(args.output_dir, exist_ok=True)
2631
+ examples_file_path = os.path.join(args.output_dir, f"{trait_name}_all_generation_examples.jsonl")
2632
+ # Write header line with metadata
2633
+ with open(examples_file_path, "w") as f:
2634
+ f.write(json.dumps({"_header": True, "trait": trait, "trait_name": trait_name, "model": args.model}) + "\n")
2635
+ print(f" šŸ“ Will save generation examples to: {examples_file_path}", flush=True)
2636
+
2637
+ # Pre-generate baseline responses ONCE (they don't depend on any loop variables)
2638
+ print(" šŸ“Š Pre-generating baseline responses for test prompts...", flush=True)
2639
+ baseline_responses_cache = {}
2640
+ for prompt in test_prompts:
2641
+ baseline = model.generate(
2642
+ [[{"role": "user", "content": prompt}]],
2643
+ **get_generate_kwargs(max_new_tokens=args.max_new_tokens),
2644
+ use_steering=False,
2645
+ )[0]
2646
+ baseline_responses_cache[prompt] = baseline
2647
+ print(f" āœ“ Generated {len(baseline_responses_cache)} baseline responses", flush=True)
2648
+
2649
+ for token_agg in token_aggregations_to_test:
2650
+ for prompt_const in prompt_constructions_to_test:
2651
+ print(
2652
+ f"\n šŸ“Š Token Aggregation: {token_agg.value}, Prompt Construction: {prompt_const.value}", flush=True
2653
+ )
2654
+
2655
+ for layer in layers_to_test:
2656
+ layer_str = str(layer)
2657
+
2658
+ # Check if we already have activations for this (layer, token_agg) combo
2659
+ cache_key = (layer, token_agg.value, prompt_const.value)
2660
+
2661
+ if cache_key not in steering_vector_cache:
2662
+ print(f"\n šŸ“ Layer {layer}: Collecting activations...", flush=True)
2663
+
2664
+ # Collect activations for this layer with current token_agg and prompt_const
2665
+ pos_acts = []
2666
+ neg_acts = []
2667
+
2668
+ for pair in pairs:
2669
+ updated_pair = collector.collect_for_pair(
2670
+ pair,
2671
+ layers=[layer_str],
2672
+ aggregation=token_agg,
2673
+ prompt_strategy=prompt_const,
2674
+ return_full_sequence=False,
2675
+ normalize_layers=False,
2676
+ )
2677
+
2678
+ if (
2679
+ updated_pair.positive_response.layers_activations
2680
+ and layer_str in updated_pair.positive_response.layers_activations
2681
+ ):
2682
+ act = updated_pair.positive_response.layers_activations[layer_str]
2683
+ if act is not None:
2684
+ pos_acts.append(act)
2685
+
2686
+ if (
2687
+ updated_pair.negative_response.layers_activations
2688
+ and layer_str in updated_pair.negative_response.layers_activations
2689
+ ):
2690
+ act = updated_pair.negative_response.layers_activations[layer_str]
2691
+ if act is not None:
2692
+ neg_acts.append(act)
2693
+
2694
+ if len(pos_acts) == 0 or len(neg_acts) == 0:
2695
+ print(f" āš ļø No activations collected for layer {layer}", flush=True)
2696
+ steering_vector_cache[cache_key] = None
2697
+ continue
2698
+
2699
+ print(
2700
+ f" āœ“ Collected {len(pos_acts)} positive, {len(neg_acts)} negative activations",
2701
+ flush=True,
2702
+ )
2703
+
2704
+ # Create steering vector using selected method
2705
+ steering_method = create_steering_method("CAA", args)
2706
+ steering_vector = steering_method.train_for_layer(pos_acts, neg_acts)
2707
+ steering_vector_cache[cache_key] = steering_vector
2708
+
2709
+ print(
2710
+ f" āœ“ Created steering vector (norm: {torch.norm(steering_vector).item():.4f})",
2711
+ flush=True,
2712
+ )
2713
+ else:
2714
+ steering_vector = steering_vector_cache[cache_key]
2715
+ if steering_vector is None:
2716
+ continue
2717
+
2718
+ # Test different strengths and steering strategies
2719
+ for strength in strengths_to_test:
2720
+ for steering_strategy in steering_strategies_to_test:
2721
+ config_count += 1
2722
+ config_key = f"L{layer}_S{strength:.2f}_St:{steering_strategy}_T:{token_agg.value}_P:{prompt_const.value}"
2723
+
2724
+ # Skip if already completed (checkpoint resume)
2725
+ if config_key in completed_configs:
2726
+ print(f" [{config_count}/{total_configs}] Skipping {config_key} (already done)", flush=True)
2727
+ continue
2728
+
2729
+ config_desc = f"L{layer} S{strength:.2f} St:{steering_strategy} T:{token_agg.value} P:{prompt_const.value}"
2730
+ print(f" [{config_count}/{total_configs}] Testing {config_desc}...", end=" ")
2731
+
2732
+ # Create steering plan
2733
+ steering_vec = SteeringVector(vector=steering_vector, scale=float(strength))
2734
+ steering_plan = SteeringPlan(
2735
+ layers={layer_str: steering_vec}, layers_description=[f"Personalization {config_desc}"]
2736
+ )
2737
+
2738
+ # Get baseline from cache and generate steered responses
2739
+ baseline_responses = [baseline_responses_cache[prompt] for prompt in test_prompts]
2740
+ steered_responses = []
2741
+
2742
+ for prompt in test_prompts:
2743
+ # Generate steered response
2744
+ model.apply_steering(steering_plan)
2745
+ steered = model.generate(
2746
+ [[{"role": "user", "content": prompt}]],
2747
+ **get_generate_kwargs(max_new_tokens=args.max_new_tokens),
2748
+ use_steering=True,
2749
+ steering_plan=steering_plan,
2750
+ )[0]
2751
+ model.clear_steering()
2752
+ steered_responses.append(steered)
2753
+
2754
+ # Evaluate using personalization metrics (static methods)
2755
+ # Calculate difference score
2756
+ difference_score = PersonalizationEvaluator._evaluate_difference(baseline_responses, steered_responses)
2757
+
2758
+ # Calculate quality score
2759
+ quality_score = PersonalizationEvaluator._evaluate_quality(steered_responses)
2760
+
2761
+ # Calculate alignment score using contrastive examples
2762
+ alignment_score = PersonalizationEvaluator.estimate_alignment(
2763
+ steered_responses, trait, positive_examples, negative_examples
2764
+ )
2765
+
2766
+ # Calculate overall score (weighted average)
2767
+ # Only count if difference > 0.3 (steering is actually doing something)
2768
+ if difference_score < 0.3:
2769
+ overall_score = 0.0
2770
+ else:
2771
+ overall_score = 0.2 * difference_score + 0.3 * quality_score + 0.5 * alignment_score
2772
+
2773
+ print(
2774
+ f"diff={difference_score:.2f} qual={quality_score:.2f} align={alignment_score:.2f} overall={overall_score:.2f}"
2775
+ )
2776
+
2777
+ # Store results with full config key (config_key already defined above)
2778
+ all_results[config_key] = {
2779
+ "layer": layer,
2780
+ "strength": float(strength),
2781
+ "steering_strategy": steering_strategy,
2782
+ "token_aggregation": token_agg.value,
2783
+ "prompt_construction": prompt_const.value,
2784
+ "difference_score": float(difference_score),
2785
+ "quality_score": float(quality_score),
2786
+ "alignment_score": float(alignment_score),
2787
+ "overall_score": float(overall_score),
2788
+ "sample_baseline": baseline_responses[0][:200] if baseline_responses else "",
2789
+ "sample_steered": steered_responses[0][:200] if steered_responses else "",
2790
+ }
2791
+
2792
+ # Save generation examples if requested
2793
+ if args.save_all_generation_examples and examples_file_path:
2794
+ example_record = {
2795
+ "layer": layer,
2796
+ "strength": float(strength),
2797
+ "steering_strategy": steering_strategy,
2798
+ "token_aggregation": token_agg.value,
2799
+ "prompt_construction": prompt_const.value,
2800
+ "overall_score": float(overall_score),
2801
+ "difference_score": float(difference_score),
2802
+ "quality_score": float(quality_score),
2803
+ "alignment_score": float(alignment_score),
2804
+ "examples": [
2805
+ {
2806
+ "prompt": test_prompts[i],
2807
+ "baseline_response": baseline_responses[i],
2808
+ "steered_response": steered_responses[i],
2809
+ }
2810
+ for i in range(len(test_prompts))
2811
+ ],
2812
+ }
2813
+ with open(examples_file_path, "a") as f:
2814
+ f.write(json.dumps(example_record) + "\n")
2815
+
2816
+ # Track best configuration
2817
+ if overall_score > best_score:
2818
+ best_score = overall_score
2819
+ best_config = {
2820
+ "layer": layer,
2821
+ "strength": float(strength),
2822
+ "steering_strategy": steering_strategy,
2823
+ "token_aggregation": token_agg.value,
2824
+ "prompt_construction": prompt_const.value,
2825
+ "difference_score": float(difference_score),
2826
+ "quality_score": float(quality_score),
2827
+ "alignment_score": float(alignment_score),
2828
+ "overall_score": float(overall_score),
2829
+ }
2830
+ best_steering_vector = steering_vector
2831
+ print(f" šŸ† New best! L{layer} S{strength:.2f} score={overall_score:.4f}", flush=True)
2832
+
2833
+ # Save checkpoint after each configuration (for resume capability)
2834
+ checkpoint_data = {
2835
+ "all_results": all_results,
2836
+ "best_config": best_config,
2837
+ "best_score": best_score,
2838
+ "config_count": config_count,
2839
+ "total_configs": total_configs,
2840
+ "trait": trait,
2841
+ "trait_name": trait_name,
2842
+ "model": args.model,
2843
+ }
2844
+ with open(checkpoint_file, "w") as f:
2845
+ json.dump(checkpoint_data, f)
2846
+
2847
+ # Sync checkpoint to S3 every 100 configs for recovery
2848
+ if config_count % 100 == 0:
2849
+ try:
2850
+ import subprocess
2851
+ s3_checkpoint_path = f"s3://wisent-bucket/checkpoints/{trait_name}_checkpoint.json"
2852
+ subprocess.run(
2853
+ ["aws", "s3", "cp", checkpoint_file, s3_checkpoint_path],
2854
+ capture_output=True,
2855
+ timeout=30
2856
+ )
2857
+ except Exception:
2858
+ pass # Don't fail if S3 sync fails
2859
+
2860
+ # Step 3: Save results
2861
+ print(f"\n{'=' * 80}")
2862
+ print("šŸ“Š OPTIMIZATION COMPLETE")
2863
+ print(f"{'=' * 80}")
2864
+
2865
+ vector_path = None
2866
+ if best_config:
2867
+ print("\nāœ… Best Configuration:")
2868
+ print(f" Layer: {best_config['layer']}")
2869
+ print(f" Strength: {best_config['strength']:.2f}")
2870
+ print(f" Steering Strategy: {best_config['steering_strategy']}")
2871
+ print(f" Token Aggregation: {best_config['token_aggregation']}")
2872
+ print(f" Prompt Construction: {best_config['prompt_construction']}")
2873
+ print(f" Difference Score: {best_config['difference_score']:.3f}")
2874
+ print(f" Quality Score: {best_config['quality_score']:.3f}")
2875
+ print(f" Alignment Score: {best_config['alignment_score']:.3f}")
2876
+ print(f" Overall Score: {best_config['overall_score']:.3f}")
2877
+
2878
+ # Save best steering vector
2879
+ vector_path = os.path.join(args.output_dir, "vectors", f"{trait_name}_optimal.pt")
2880
+ torch.save(
2881
+ {
2882
+ "steering_vector": best_steering_vector,
2883
+ "layer": best_config["layer"],
2884
+ "layer_index": best_config["layer"],
2885
+ "strength": best_config["strength"],
2886
+ "steering_strategy": best_config["steering_strategy"],
2887
+ "token_aggregation": best_config["token_aggregation"],
2888
+ "prompt_construction": best_config["prompt_construction"],
2889
+ "trait": trait,
2890
+ "trait_name": trait_name,
2891
+ "model": args.model,
2892
+ "method": "CAA",
2893
+ "optimization_scores": {
2894
+ "difference": best_config["difference_score"],
2895
+ "quality": best_config["quality_score"],
2896
+ "alignment": best_config["alignment_score"],
2897
+ "overall": best_config["overall_score"],
2898
+ },
2899
+ },
2900
+ vector_path,
2901
+ )
2902
+ print(f"\nšŸ’¾ Saved optimal steering vector to: {vector_path}")
2903
+ else:
2904
+ print("\nāš ļø No valid configuration found")
2905
+
2906
+ # Save full results to JSON
2907
+ results_file = os.path.join(args.output_dir, f"{trait_name}_optimization_results.json")
2908
+
2909
+ # best_config doesn't have steering_vector anymore (it's in best_steering_vector)
2910
+ best_config_json = best_config
2911
+
2912
+ output_data = {
2913
+ "model": args.model,
2914
+ "trait": trait,
2915
+ "trait_name": trait_name,
2916
+ "num_pairs": args.num_pairs,
2917
+ "num_test_prompts": args.num_test_prompts,
2918
+ "layers_tested": layers_to_test,
2919
+ "strengths_tested": [float(s) for s in strengths_to_test],
2920
+ "steering_strategies_tested": steering_strategies_to_test,
2921
+ "token_aggregations_tested": [t.value for t in token_aggregations_to_test],
2922
+ "prompt_constructions_tested": [p.value for p in prompt_constructions_to_test],
2923
+ "best_config": best_config_json,
2924
+ "all_results": all_results,
2925
+ }
2926
+
2927
+ with open(results_file, "w") as f:
2928
+ json.dump(output_data, f, indent=2)
2929
+
2930
+ print(f"šŸ’¾ Saved full results to: {results_file}")
2931
+
2932
+ # Remove checkpoint file after successful completion
2933
+ if os.path.exists(checkpoint_file):
2934
+ os.remove(checkpoint_file)
2935
+ print(f"🧹 Removed checkpoint file: {checkpoint_file}")
2936
+
2937
+ if args.save_all_generation_examples and examples_file_path:
2938
+ print(f"šŸ’¾ Generation examples saved iteratively to: {examples_file_path}")
2939
+
2940
+ # Print usage example
2941
+ print("\nšŸ“ Usage Example:")
2942
+ if best_config:
2943
+ print(" python -m wisent.core.main multi-steer \\")
2944
+ print(f" --vector {vector_path}:{best_config['strength']:.1f} \\")
2945
+ print(f" --model {args.model} \\")
2946
+ print(f" --layer {best_config['layer']} \\")
2947
+ print(' --prompt "Your prompt here"')
2948
+
2949
+ print(f"\n{'=' * 80}\n")
2950
+
2951
+ return {
2952
+ "action": "personalization",
2953
+ "trait": trait,
2954
+ "trait_name": trait_name,
2955
+ "best_config": best_config_json,
2956
+ "results_file": results_file,
2957
+ "vector_path": vector_path if best_config else None,
2958
+ }
2959
+
2960
+
2961
+ def execute_multi_personalization(args, model):
2962
+ """
2963
+ Execute multi-trait joint personalization optimization.
2964
+
2965
+ This finds a SINGLE optimal configuration (layer, token_aggregation, prompt_construction)
2966
+ that works well for ALL traits, then optimizes strength per-trait individually.
2967
+
2968
+ The approach:
2969
+ 1. Generate synthetic contrastive pairs for each trait
2970
+ 2. For each (layer, token_agg, prompt_const) configuration:
2971
+ - Compute steering vectors for ALL traits
2972
+ - Find optimal strength for each trait individually
2973
+ - Compute combined score = mean(trait_scores)
2974
+ 3. Select the configuration with highest combined score
2975
+ 4. Return: shared (layer, token_agg, prompt_const) + per-trait strength
2976
+ """
2977
+ import os
2978
+
2979
+ import torch
2980
+
2981
+ from wisent.core.activations.activations_collector import ActivationCollector
2982
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
2983
+ from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
2984
+ from wisent.core.evaluators.steering_evaluators import PersonalizationEvaluator
2985
+ from wisent.core.models.core.atoms import SteeringPlan, SteeringVector
2986
+ from wisent.core.cli.steering_method_trainer import create_steering_method
2987
+ from wisent.core.synthetic.cleaners.pairs_cleaner import PairsCleaner
2988
+ from wisent.core.synthetic.db_instructions.mini_dp import Default_DB_Instructions
2989
+ from wisent.core.synthetic.generators.diversities.methods.fast_diversity import FastDiversity
2990
+ from wisent.core.synthetic.generators.pairs_generator import SyntheticContrastivePairsGenerator
2991
+
2992
+ traits = args.traits
2993
+ trait_names = args.trait_names or [t.split()[0].lower() for t in traits]
2994
+
2995
+ if len(trait_names) != len(traits):
2996
+ print(f"Error: Number of --trait-name args ({len(trait_names)}) must match --trait args ({len(traits)})")
2997
+ return None
2998
+
2999
+ print(f"\n{'=' * 80}", flush=True)
3000
+ print("šŸŽ­ MULTI-TRAIT JOINT PERSONALIZATION OPTIMIZATION", flush=True)
3001
+ print(f"{'=' * 80}", flush=True)
3002
+ print(f" Model: {args.model}", flush=True)
3003
+ print(f" Traits: {len(traits)}", flush=True)
3004
+ for i, (trait, name) in enumerate(zip(traits, trait_names)):
3005
+ print(f" {i + 1}. {name}: {trait[:50]}...", flush=True)
3006
+ print(f" Num Pairs per trait: {args.num_pairs}", flush=True)
3007
+ print(f" Num Test Prompts: {args.num_test_prompts}", flush=True)
3008
+ print(f" Output Directory: {args.output_dir}", flush=True)
3009
+ print(f"{'=' * 80}\n", flush=True)
3010
+
3011
+ # Create output directory
3012
+ os.makedirs(args.output_dir, exist_ok=True)
3013
+ os.makedirs(os.path.join(args.output_dir, "vectors"), exist_ok=True)
3014
+
3015
+ # Determine layers to test - default to middle 50% of layers where steering works best
3016
+ if args.layers:
3017
+ layers_to_test = args.layers
3018
+ else:
3019
+ num_layers = model.num_layers
3020
+ # Test middle 50% of layers (e.g., layers 8-20 for a 28-layer model)
3021
+ start_layer = max(1, num_layers // 4)
3022
+ end_layer = min(num_layers, 3 * num_layers // 4)
3023
+ layers_to_test = list(range(start_layer, end_layer + 1))
3024
+
3025
+ # Determine strengths to test
3026
+ min_strength, max_strength = args.strength_range
3027
+ strengths_to_test = np.linspace(min_strength, max_strength, args.num_strength_steps)
3028
+
3029
+ # Token aggregation strategies to test
3030
+ token_aggregations_to_test = [
3031
+ ActivationAggregationStrategy.LAST_TOKEN,
3032
+ ActivationAggregationStrategy.MEAN_POOLING,
3033
+ ActivationAggregationStrategy.FIRST_TOKEN,
3034
+ ActivationAggregationStrategy.MAX_POOLING,
3035
+ ]
3036
+
3037
+ # Prompt construction strategies to test
3038
+ prompt_constructions_to_test = [
3039
+ PromptConstructionStrategy.CHAT_TEMPLATE,
3040
+ PromptConstructionStrategy.DIRECT_COMPLETION,
3041
+ PromptConstructionStrategy.INSTRUCTION_FOLLOWING,
3042
+ PromptConstructionStrategy.ROLE_PLAYING,
3043
+ PromptConstructionStrategy.MULTIPLE_CHOICE,
3044
+ ]
3045
+
3046
+ # Use a fixed steering strategy (initial_only works well for multi-trait)
3047
+ steering_strategy = "initial_only"
3048
+
3049
+ total_shared_configs = len(layers_to_test) * len(token_aggregations_to_test) * len(prompt_constructions_to_test)
3050
+
3051
+ print("šŸ“Š Search Space:", flush=True)
3052
+ print(f" Shared configs (layer Ɨ token_agg Ɨ prompt_const): {total_shared_configs}", flush=True)
3053
+ print(f" Strengths per trait: {len(strengths_to_test)}", flush=True)
3054
+ print(f" Steering strategy: {steering_strategy} (fixed)", flush=True)
3055
+ print("\n", flush=True)
3056
+
3057
+ # Step 1: Generate synthetic contrastive pairs for each trait
3058
+ print(f"šŸ”§ Step 1: Generating synthetic pairs for {len(traits)} traits...", flush=True)
3059
+
3060
+ trait_pairs = {}
3061
+ for trait, name in zip(traits, trait_names):
3062
+ print(f"\n Generating pairs for '{name}'...", flush=True)
3063
+
3064
+ cleaner = PairsCleaner(steps=[])
3065
+ diversity = FastDiversity()
3066
+ db_instructions = Default_DB_Instructions()
3067
+
3068
+ pair_generator = SyntheticContrastivePairsGenerator(
3069
+ model=model,
3070
+ generation_config=get_generate_kwargs(max_new_tokens=150),
3071
+ contrastive_set_name=f"{name}_pairs",
3072
+ trait_description=trait,
3073
+ trait_label=name,
3074
+ db_instructions=db_instructions,
3075
+ cleaner=cleaner,
3076
+ diversity=diversity,
3077
+ )
3078
+
3079
+ pair_set, _ = pair_generator.generate(num_pairs=args.num_pairs)
3080
+ trait_pairs[name] = {"trait": trait, "pairs": pair_set.pairs}
3081
+ print(f" āœ“ Generated {len(pair_set.pairs)} pairs for '{name}'", flush=True)
3082
+
3083
+ # Test prompts for evaluation
3084
+ test_prompts = [
3085
+ "What's your favorite food?",
3086
+ "How do you spend your weekends?",
3087
+ "What motivates you in life?",
3088
+ "How do you handle setbacks?",
3089
+ "What's your opinion on teamwork?",
3090
+ ][: args.num_test_prompts]
3091
+
3092
+ print(f"\nšŸ“ Test prompts: {test_prompts}", flush=True)
3093
+
3094
+ # Initialize collector
3095
+ collector = ActivationCollector(model=model, store_device="cpu")
3096
+
3097
+ # Track results
3098
+ all_results = {}
3099
+ best_shared_config = None
3100
+ best_combined_score = -1.0
3101
+ best_per_trait_strengths = {}
3102
+ best_steering_vectors = {}
3103
+ best_overall_sample_responses = []
3104
+
3105
+ # Step 2: Test each shared configuration
3106
+ print(f"\nšŸŽÆ Step 2: Testing {total_shared_configs} shared configurations...", flush=True)
3107
+
3108
+ config_count = 0
3109
+
3110
+ for token_agg in token_aggregations_to_test:
3111
+ for prompt_const in prompt_constructions_to_test:
3112
+ for layer in layers_to_test:
3113
+ config_count += 1
3114
+ layer_str = str(layer)
3115
+ shared_config_key = f"L{layer}_T:{token_agg.value}_P:{prompt_const.value}"
3116
+
3117
+ print(f"\n[{config_count}/{total_shared_configs}] {shared_config_key}", flush=True)
3118
+
3119
+ # Compute steering vectors for each trait with this config
3120
+ trait_vectors = {}
3121
+ for name, data in trait_pairs.items():
3122
+ pairs = data["pairs"]
3123
+
3124
+ pos_acts = []
3125
+ neg_acts = []
3126
+
3127
+ for pair in pairs:
3128
+ updated_pair = collector.collect_for_pair(
3129
+ pair,
3130
+ layers=[layer_str],
3131
+ aggregation=token_agg,
3132
+ prompt_strategy=prompt_const,
3133
+ return_full_sequence=False,
3134
+ normalize_layers=False,
3135
+ )
3136
+
3137
+ if (
3138
+ updated_pair.positive_response.layers_activations
3139
+ and layer_str in updated_pair.positive_response.layers_activations
3140
+ ):
3141
+ act = updated_pair.positive_response.layers_activations[layer_str]
3142
+ if act is not None:
3143
+ pos_acts.append(act)
3144
+
3145
+ if (
3146
+ updated_pair.negative_response.layers_activations
3147
+ and layer_str in updated_pair.negative_response.layers_activations
3148
+ ):
3149
+ act = updated_pair.negative_response.layers_activations[layer_str]
3150
+ if act is not None:
3151
+ neg_acts.append(act)
3152
+
3153
+ if len(pos_acts) == 0 or len(neg_acts) == 0:
3154
+ print(f" āš ļø No activations for '{name}' - skipping config", flush=True)
3155
+ trait_vectors = None
3156
+ break
3157
+
3158
+ steering_method = create_steering_method("CAA", args)
3159
+ steering_vector = steering_method.train_for_layer(pos_acts, neg_acts)
3160
+ trait_vectors[name] = steering_vector
3161
+
3162
+ if trait_vectors is None:
3163
+ continue
3164
+
3165
+ # Use Latin Hypercube Sampling to efficiently explore strength space
3166
+ # Instead of testing all N^T combinations, sample ~20 representative points
3167
+ import random
3168
+ from itertools import product
3169
+
3170
+ best_combined_score_for_config = -1.0
3171
+ best_strengths_for_config = dict.fromkeys(trait_names, strengths_to_test[0])
3172
+ best_sample_responses = []
3173
+
3174
+ # Generate strength combinations - use sampling to reduce search space
3175
+ all_strength_combos = list(product(strengths_to_test, repeat=len(trait_names)))
3176
+
3177
+ # If too many combinations, sample a subset that includes edges and random middle points
3178
+ max_samples = 25 # Test at most 25 combinations per config
3179
+ if len(all_strength_combos) > max_samples:
3180
+ # Always include corner cases (min, max for each trait)
3181
+ corners = [
3182
+ tuple([strengths_to_test[0]] * len(trait_names)), # All min
3183
+ tuple([strengths_to_test[-1]] * len(trait_names)), # All max
3184
+ tuple([strengths_to_test[len(strengths_to_test) // 2]] * len(trait_names)), # All mid
3185
+ ]
3186
+ # Add some diagonal samples
3187
+ for i in range(len(strengths_to_test)):
3188
+ corners.append(tuple([strengths_to_test[i]] * len(trait_names)))
3189
+
3190
+ # Randomly sample the rest
3191
+ remaining = max_samples - len(set(corners))
3192
+ random.seed(42) # For reproducibility
3193
+ other_combos = [c for c in all_strength_combos if c not in corners]
3194
+ sampled = random.sample(other_combos, min(remaining, len(other_combos)))
3195
+
3196
+ strength_combos = list(set(corners)) + sampled
3197
+ else:
3198
+ strength_combos = all_strength_combos
3199
+
3200
+ # Pre-generate baseline responses ONCE per config (they don't depend on steering)
3201
+ baseline_responses = []
3202
+ for prompt in test_prompts:
3203
+ baseline = model.generate(
3204
+ [[{"role": "user", "content": prompt}]],
3205
+ **get_generate_kwargs(max_new_tokens=args.max_new_tokens),
3206
+ use_steering=False,
3207
+ )[0]
3208
+ baseline_responses.append(baseline)
3209
+
3210
+ num_strength_combos = len(strength_combos)
3211
+ for combo_idx, strength_combo in enumerate(strength_combos):
3212
+ if args.verbose and combo_idx % 5 == 0:
3213
+ import sys
3214
+
3215
+ sys.stdout.write(f"\r Testing strength {combo_idx + 1}/{num_strength_combos}...")
3216
+ sys.stdout.flush()
3217
+ current_strengths = dict(zip(trait_names, strength_combo))
3218
+
3219
+ # Create COMBINED steering plan with ALL vectors at once
3220
+ combined_vector = None
3221
+ for name, strength in current_strengths.items():
3222
+ scaled_vector = trait_vectors[name] * float(strength)
3223
+ if combined_vector is None:
3224
+ combined_vector = scaled_vector.clone()
3225
+ else:
3226
+ combined_vector = combined_vector + scaled_vector
3227
+
3228
+ steering_vec = SteeringVector(vector=combined_vector, scale=1.0)
3229
+ steering_plan = SteeringPlan(
3230
+ layers={layer_str: steering_vec},
3231
+ layers_description=[f"Multi-trait combined: {'+'.join(trait_names)}"],
3232
+ )
3233
+
3234
+ # Generate only steered responses (baselines were pre-generated)
3235
+ steered_responses = []
3236
+ for prompt in test_prompts:
3237
+ model.apply_steering(steering_plan)
3238
+ steered = model.generate(
3239
+ [[{"role": "user", "content": prompt}]],
3240
+ **get_generate_kwargs(max_new_tokens=args.max_new_tokens),
3241
+ use_steering=True,
3242
+ steering_plan=steering_plan,
3243
+ )[0]
3244
+ model.clear_steering()
3245
+ steered_responses.append(steered)
3246
+
3247
+ # Evaluate combined output against ALL traits together (static methods)
3248
+ difference_score = PersonalizationEvaluator._evaluate_difference(baseline_responses, steered_responses)
3249
+ quality_score = PersonalizationEvaluator._evaluate_quality(steered_responses)
3250
+
3251
+ # Compute alignment score against COMBINED trait description
3252
+ # For multi-trait, combine positive/negative examples from all traits
3253
+ combined_trait_description = " AND ".join([trait_pairs[name]["trait"] for name in trait_names])
3254
+ all_positive_examples = []
3255
+ all_negative_examples = []
3256
+ for name in trait_names:
3257
+ all_positive_examples.extend([p.positive_response.model_response for p in trait_pairs[name]["pairs"]])
3258
+ all_negative_examples.extend([p.negative_response.model_response for p in trait_pairs[name]["pairs"]])
3259
+ alignment_score = PersonalizationEvaluator.estimate_alignment(
3260
+ steered_responses, combined_trait_description, all_positive_examples, all_negative_examples
3261
+ )
3262
+
3263
+ if difference_score < 0.3:
3264
+ overall_score = 0.0
3265
+ else:
3266
+ overall_score = 0.2 * difference_score + 0.3 * quality_score + 0.5 * alignment_score
3267
+
3268
+ if overall_score > best_combined_score_for_config:
3269
+ best_combined_score_for_config = overall_score
3270
+ best_strengths_for_config = current_strengths.copy()
3271
+ best_sample_responses = list(zip(test_prompts, baseline_responses, steered_responses))
3272
+
3273
+ # Store per-trait strengths from best combo
3274
+ trait_best_strengths = best_strengths_for_config
3275
+ combined_score = best_combined_score_for_config
3276
+
3277
+ print(
3278
+ f" Strengths: {', '.join([f'{n}={s:.2f}' for n, s in trait_best_strengths.items()])}",
3279
+ flush=True,
3280
+ )
3281
+ print(f" → Combined score (all traits at once): {combined_score:.3f}", flush=True)
3282
+
3283
+ # Show sample responses for this config
3284
+ if best_sample_responses and args.verbose:
3285
+ print("\n šŸ“ Sample responses:", flush=True)
3286
+ for prompt, baseline, steered in best_sample_responses[:2]:
3287
+ print(f" Prompt: {prompt}", flush=True)
3288
+ print(f" Baseline: {baseline[:100]}...", flush=True)
3289
+ print(f" Steered: {steered[:100]}...", flush=True)
3290
+
3291
+ # Store result
3292
+ all_results[shared_config_key] = {
3293
+ "layer": layer,
3294
+ "token_aggregation": token_agg.value,
3295
+ "prompt_construction": prompt_const.value,
3296
+ "steering_strategy": steering_strategy,
3297
+ "per_trait_strengths": trait_best_strengths,
3298
+ "combined_score": float(combined_score),
3299
+ "sample_responses": [
3300
+ {"prompt": p, "baseline": b, "steered": s} for p, b, s in best_sample_responses
3301
+ ]
3302
+ if best_sample_responses
3303
+ else [],
3304
+ }
3305
+
3306
+ if combined_score > best_combined_score:
3307
+ best_combined_score = combined_score
3308
+ best_shared_config = {
3309
+ "layer": layer,
3310
+ "token_aggregation": token_agg.value,
3311
+ "prompt_construction": prompt_const.value,
3312
+ "steering_strategy": steering_strategy,
3313
+ }
3314
+ best_per_trait_strengths = trait_best_strengths.copy()
3315
+ best_steering_vectors = {name: v.clone() for name, v in trait_vectors.items()}
3316
+ best_overall_sample_responses = best_sample_responses.copy() if best_sample_responses else []
3317
+
3318
+ # Step 3: Save results
3319
+ print(f"\n{'=' * 80}", flush=True)
3320
+ print("šŸ“Š MULTI-TRAIT OPTIMIZATION COMPLETE", flush=True)
3321
+ print(f"{'=' * 80}", flush=True)
3322
+
3323
+ vector_paths = {}
3324
+ if best_shared_config:
3325
+ print("\nāœ… Best Shared Configuration:", flush=True)
3326
+ print(f" Layer: {best_shared_config['layer']}", flush=True)
3327
+ print(f" Token Aggregation: {best_shared_config['token_aggregation']}", flush=True)
3328
+ print(f" Prompt Construction: {best_shared_config['prompt_construction']}", flush=True)
3329
+ print(f" Steering Strategy: {best_shared_config['steering_strategy']}", flush=True)
3330
+ print("\nāœ… Per-Trait Optimal Strengths:", flush=True)
3331
+ for name, strength in best_per_trait_strengths.items():
3332
+ print(f" {name}: {strength:.2f}", flush=True)
3333
+ print(f"\n Combined Score: {best_combined_score:.3f}", flush=True)
3334
+
3335
+ # Print sample responses from the best configuration
3336
+ if best_overall_sample_responses:
3337
+ print(f"\n{'=' * 80}", flush=True)
3338
+ print("šŸ“ SAMPLE RESPONSES (Best Configuration)", flush=True)
3339
+ print(f"{'=' * 80}", flush=True)
3340
+ for prompt, baseline, steered in best_overall_sample_responses:
3341
+ print(f"\n šŸ—£ļø Prompt: {prompt}", flush=True)
3342
+ print("\n šŸ“„ Baseline Response:", flush=True)
3343
+ print(f" {baseline}", flush=True)
3344
+ print("\n šŸŽÆ Steered Response (evil + italian):", flush=True)
3345
+ print(f" {steered}", flush=True)
3346
+ print(f"\n {'-' * 70}", flush=True)
3347
+
3348
+ # Save steering vectors for each trait
3349
+ for name in trait_names:
3350
+ vector_path = os.path.join(args.output_dir, "vectors", f"{name}_optimal.pt")
3351
+ torch.save(
3352
+ {
3353
+ "steering_vector": best_steering_vectors[name],
3354
+ "layer": best_shared_config["layer"],
3355
+ "layer_index": best_shared_config["layer"],
3356
+ "strength": best_per_trait_strengths[name],
3357
+ "steering_strategy": best_shared_config["steering_strategy"],
3358
+ "token_aggregation": best_shared_config["token_aggregation"],
3359
+ "prompt_construction": best_shared_config["prompt_construction"],
3360
+ "trait": trait_pairs[name]["trait"],
3361
+ "trait_name": name,
3362
+ "model": args.model,
3363
+ "method": "CAA",
3364
+ "multi_trait_optimization": True,
3365
+ },
3366
+ vector_path,
3367
+ )
3368
+ vector_paths[name] = vector_path
3369
+ print(f"\nšŸ’¾ Saved {name} vector to: {vector_path}", flush=True)
3370
+ else:
3371
+ print("\nāš ļø No valid configuration found", flush=True)
3372
+
3373
+ # Save full results
3374
+ results_file = os.path.join(args.output_dir, "multi_trait_optimization_results.json")
3375
+
3376
+ output_data = {
3377
+ "model": args.model,
3378
+ "traits": {name: trait_pairs[name]["trait"] for name in trait_names},
3379
+ "num_pairs_per_trait": args.num_pairs,
3380
+ "num_test_prompts": args.num_test_prompts,
3381
+ "layers_tested": layers_to_test,
3382
+ "strengths_tested": [float(s) for s in strengths_to_test],
3383
+ "token_aggregations_tested": [t.value for t in token_aggregations_to_test],
3384
+ "prompt_constructions_tested": [p.value for p in prompt_constructions_to_test],
3385
+ "best_shared_config": best_shared_config,
3386
+ "best_per_trait_strengths": best_per_trait_strengths,
3387
+ "best_combined_score": best_combined_score,
3388
+ "best_sample_responses": [
3389
+ {"prompt": p, "baseline": b, "steered": s} for p, b, s in best_overall_sample_responses
3390
+ ]
3391
+ if best_overall_sample_responses
3392
+ else [],
3393
+ "all_results": all_results,
3394
+ }
3395
+
3396
+ with open(results_file, "w") as f:
3397
+ json.dump(output_data, f, indent=2)
3398
+
3399
+ print(f"\nšŸ’¾ Saved full results to: {results_file}", flush=True)
3400
+
3401
+ # Print usage example
3402
+ if best_shared_config and vector_paths:
3403
+ print("\nšŸ“ Usage Example:", flush=True)
3404
+ print(" python -m wisent.core.main multi-steer \\", flush=True)
3405
+ for name in trait_names:
3406
+ print(f" --vector {vector_paths[name]}:{best_per_trait_strengths[name]:.1f} \\", flush=True)
3407
+ print(f" --model {args.model} \\", flush=True)
3408
+ print(f" --layer {best_shared_config['layer']} \\", flush=True)
3409
+ print(' --prompt "Your prompt here"', flush=True)
3410
+
3411
+ print(f"\n{'=' * 80}\n", flush=True)
3412
+
3413
+ return {
3414
+ "action": "multi-personalization",
3415
+ "traits": trait_names,
3416
+ "best_shared_config": best_shared_config,
3417
+ "best_per_trait_strengths": best_per_trait_strengths,
3418
+ "best_combined_score": best_combined_score,
3419
+ "results_file": results_file,
3420
+ "vector_paths": vector_paths,
3421
+ }