wisent 0.7.379__py3-none-any.whl → 0.7.701__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (725) hide show
  1. wisent/__init__.py +1 -1
  2. wisent/core/activations/__init__.py +22 -6
  3. wisent/core/activations/activations.py +21 -39
  4. wisent/core/activations/activations_collector.py +141 -373
  5. wisent/core/activations/classifier_inference_strategy.py +194 -0
  6. wisent/core/activations/core/atoms.py +8 -92
  7. wisent/core/activations/extraction_strategy.py +308 -0
  8. wisent/core/agent/diagnose/response_diagnostics.py +3 -3
  9. wisent/core/agent/diagnose.py +3 -3
  10. wisent/core/autonomous_agent.py +2 -2
  11. wisent/core/cli/agent/apply_steering.py +23 -27
  12. wisent/core/cli/agent/evaluate_response.py +18 -20
  13. wisent/core/cli/agent/train_classifier.py +18 -20
  14. wisent/core/cli/cluster_benchmarks.py +472 -0
  15. wisent/core/cli/create_steering_vector.py +13 -5
  16. wisent/core/cli/generate_vector_from_task.py +4 -0
  17. wisent/core/cli/get_activations.py +12 -36
  18. wisent/core/cli/method_optimizer.py +859 -0
  19. wisent/core/cli/optimize.py +44 -5
  20. wisent/core/cli/optimize_classification.py +5 -6
  21. wisent/core/cli/optimize_sample_size.py +8 -22
  22. wisent/core/cli/optimize_steering.py +429 -153
  23. wisent/core/cli/optimize_weights.py +65 -6
  24. wisent/core/cli/steering_method_trainer.py +5 -4
  25. wisent/core/cli/steering_search_space.py +20 -15
  26. wisent/core/cli/tasks.py +14 -43
  27. wisent/core/cli/train_unified_goodness.py +17 -18
  28. wisent/core/contrastive_pairs/diagnostics/control_vectors.py +1578 -173
  29. wisent/core/contrastive_pairs/diagnostics/linearity.py +63 -80
  30. wisent/core/contrastive_pairs/diagnostics/vector_quality.py +6 -5
  31. wisent/core/contrastive_pairs/huggingface_pairs/hf_extractor_manifest.py +5 -19
  32. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/__init__.py +11 -5
  33. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/apps.py +146 -32
  34. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue.py +2 -2
  35. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/humaneval.py +98 -57
  36. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/code_x_glue.py +8 -8
  37. wisent/core/contrastive_pairs/lm_eval_pairs/group_task_manifests/freebase.py +1 -1
  38. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -5
  39. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/agieval_aqua_rat.py +129 -0
  40. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/code_x_glue.py +11 -6
  41. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +1 -1
  42. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mbpp.py +47 -6
  43. wisent/core/evaluators/benchmark_specific/apps_evaluator.py +133 -0
  44. wisent/core/evaluators/benchmark_specific/coding/metrics/evaluator.py +6 -1
  45. wisent/core/evaluators/benchmark_specific/conala_evaluator.py +31 -168
  46. wisent/core/evaluators/custom/examples/humanization_coherent.py +89 -35
  47. wisent/core/evaluators/oracles/truthfulqa_gen_evaluator.py +2 -20
  48. wisent/core/evaluators/personalization/coherence.py +46 -0
  49. wisent/core/hyperparameter_optimizer.py +13 -13
  50. wisent/core/lm_eval_harness_ground_truth.py +7 -11
  51. wisent/core/main.py +3 -0
  52. wisent/core/models/wisent_model.py +8 -7
  53. wisent/core/opti/methods/opti_weights.py +29 -2
  54. wisent/core/optuna/classifier/activation_generator.py +14 -12
  55. wisent/core/optuna/steering/steering_optimization.py +14 -9
  56. wisent/core/parser_arguments/cluster_benchmarks_parser.py +31 -0
  57. wisent/core/parser_arguments/generate_vector_from_task_parser.py +20 -0
  58. wisent/core/parser_arguments/main_parser.py +8 -0
  59. wisent/core/parser_arguments/optimize_steering_parser.py +117 -10
  60. wisent/core/parser_arguments/optimize_weights_parser.py +6 -0
  61. wisent/core/parser_arguments/tasks_parser.py +7 -19
  62. wisent/core/steering_methods/core/atoms.py +1 -2
  63. wisent/core/steering_methods/methods/caa.py +1 -1
  64. wisent/core/steering_methods/methods/hyperplane.py +74 -0
  65. wisent/core/steering_methods/methods/prism.py +1 -2
  66. wisent/core/steering_methods/methods/pulse.py +39 -8
  67. wisent/core/steering_methods/methods/titan.py +59 -14
  68. wisent/core/steering_methods/registry.py +52 -12
  69. wisent/core/steering_optimizer.py +15 -15
  70. wisent/core/trainers/steering_trainer.py +9 -18
  71. wisent/parameters/lm_eval/track_progress_not_lm_eval_tasks.json +19 -70
  72. wisent/scripts/run_quality_metrics_sweep.sh +22 -27
  73. wisent/tests/test_aggregation_geometry.py +236 -0
  74. wisent/tests/test_detector_accuracy.py +163 -0
  75. wisent/tests/test_geometry_exhaustive.py +1202 -0
  76. wisent/tests/visualize_geometry.py +255 -61
  77. {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/METADATA +1 -1
  78. {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/RECORD +82 -714
  79. wisent/core/activations/prompt_construction_strategy.py +0 -47
  80. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text.py +0 -15
  81. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_go.py +0 -64
  82. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_java.py +0 -65
  83. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_javascript.py +0 -65
  84. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_php.py +0 -65
  85. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_python.py +0 -65
  86. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_ruby.py +0 -65
  87. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/freebase.py +0 -99
  88. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/instruct_humaneval.py +0 -180
  89. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/instructhumaneval.py +0 -129
  90. wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/mbpp.py +0 -142
  91. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/agieval.py +0 -155
  92. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/code2text.py +0 -161
  93. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/codexglue.py +0 -107
  94. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livemathbench.py +0 -155
  95. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/polymath.py +0 -155
  96. wisent/examples/scripts/results/benchmark_descriptions.json +0 -1244
  97. wisent/examples/scripts/results/benchmark_evaluation_methods.json +0 -66
  98. wisent/examples/scripts/results/benchmark_evaluator_mapping.json +0 -2781
  99. wisent/examples/scripts/results/benchmark_evaluator_mapping_updated.json +0 -30536
  100. wisent/examples/scripts/results/benchmark_evaluators_clean.json +0 -469
  101. wisent/examples/scripts/results/benchmark_methods_summary.json +0 -260
  102. wisent/examples/scripts/results/benchmark_pair_creation_methods.json +0 -66
  103. wisent/examples/scripts/results/benchmark_pair_totals.json +0 -269
  104. wisent/examples/scripts/results/benchmark_tags.json +0 -917
  105. wisent/examples/scripts/results/benchmark_test_summary_nov4.json +0 -71
  106. wisent/examples/scripts/results/coding_benchmarks_test_code_status.json +0 -150
  107. wisent/examples/scripts/results/failing_benchmarks.json +0 -946
  108. wisent/examples/scripts/results/failing_benchmarks_list.json +0 -41
  109. wisent/examples/scripts/results/failing_benchmarks_test_results.json +0 -945
  110. wisent/examples/scripts/results/missing_benchmark_tags.json +0 -341
  111. wisent/examples/scripts/results/test_20_newsgroups_evaluation.json +0 -30
  112. wisent/examples/scripts/results/test_20_newsgroups_pairs.json +0 -8
  113. wisent/examples/scripts/results/test_AraDICE_evaluation.json +0 -51
  114. wisent/examples/scripts/results/test_AraDICE_pairs.json +0 -14
  115. wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_evaluation.json +0 -30
  116. wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_pairs.json +0 -8
  117. wisent/examples/scripts/results/test_ArabCulture_evaluation.json +0 -51
  118. wisent/examples/scripts/results/test_ArabCulture_pairs.json +0 -14
  119. wisent/examples/scripts/results/test_Tag_evaluation.json +0 -30
  120. wisent/examples/scripts/results/test_Tag_pairs.json +0 -8
  121. wisent/examples/scripts/results/test_aclue_evaluation.json +0 -51
  122. wisent/examples/scripts/results/test_aclue_pairs.json +0 -14
  123. wisent/examples/scripts/results/test_acp_bench_evaluation.json +0 -51
  124. wisent/examples/scripts/results/test_acp_bench_hard_evaluation.json +0 -51
  125. wisent/examples/scripts/results/test_acp_bench_hard_pairs.json +0 -14
  126. wisent/examples/scripts/results/test_acp_bench_pairs.json +0 -14
  127. wisent/examples/scripts/results/test_advanced_ai_risk_evaluation.json +0 -51
  128. wisent/examples/scripts/results/test_advanced_ai_risk_pairs.json +0 -14
  129. wisent/examples/scripts/results/test_aexams_evaluation.json +0 -51
  130. wisent/examples/scripts/results/test_aexams_pairs.json +0 -14
  131. wisent/examples/scripts/results/test_afrimgsm_direct_amh_evaluation.json +0 -30
  132. wisent/examples/scripts/results/test_afrimgsm_direct_amh_pairs.json +0 -8
  133. wisent/examples/scripts/results/test_afrimmlu_direct_amh_evaluation.json +0 -30
  134. wisent/examples/scripts/results/test_afrimmlu_direct_amh_pairs.json +0 -8
  135. wisent/examples/scripts/results/test_afrixnli_en_direct_amh_evaluation.json +0 -30
  136. wisent/examples/scripts/results/test_afrixnli_en_direct_amh_pairs.json +0 -8
  137. wisent/examples/scripts/results/test_ag_news_evaluation.json +0 -30
  138. wisent/examples/scripts/results/test_ag_news_pairs.json +0 -8
  139. wisent/examples/scripts/results/test_agieval_evaluation.json +0 -51
  140. wisent/examples/scripts/results/test_agieval_pairs.json +0 -14
  141. wisent/examples/scripts/results/test_aime2024_evaluation.json +0 -30
  142. wisent/examples/scripts/results/test_aime2024_pairs.json +0 -8
  143. wisent/examples/scripts/results/test_aime2025_evaluation.json +0 -30
  144. wisent/examples/scripts/results/test_aime2025_pairs.json +0 -8
  145. wisent/examples/scripts/results/test_aime_evaluation.json +0 -30
  146. wisent/examples/scripts/results/test_aime_pairs.json +0 -8
  147. wisent/examples/scripts/results/test_anagrams1_evaluation.json +0 -30
  148. wisent/examples/scripts/results/test_anagrams1_pairs.json +0 -8
  149. wisent/examples/scripts/results/test_anagrams2_evaluation.json +0 -30
  150. wisent/examples/scripts/results/test_anagrams2_pairs.json +0 -8
  151. wisent/examples/scripts/results/test_anli_evaluation.json +0 -30
  152. wisent/examples/scripts/results/test_anli_pairs.json +0 -8
  153. wisent/examples/scripts/results/test_apps_evaluation.json +0 -30
  154. wisent/examples/scripts/results/test_apps_pairs.json +0 -8
  155. wisent/examples/scripts/results/test_arabic_exams_evaluation.json +0 -30
  156. wisent/examples/scripts/results/test_arabic_exams_pairs.json +0 -8
  157. wisent/examples/scripts/results/test_arabic_leaderboard_complete_evaluation.json +0 -51
  158. wisent/examples/scripts/results/test_arabic_leaderboard_complete_pairs.json +0 -14
  159. wisent/examples/scripts/results/test_arabic_leaderboard_light_evaluation.json +0 -51
  160. wisent/examples/scripts/results/test_arabic_leaderboard_light_pairs.json +0 -14
  161. wisent/examples/scripts/results/test_arabicmmlu_evaluation.json +0 -51
  162. wisent/examples/scripts/results/test_arabicmmlu_pairs.json +0 -14
  163. wisent/examples/scripts/results/test_aradice/test_aradice_evaluation.json +0 -51
  164. wisent/examples/scripts/results/test_aradice/test_aradice_pairs.json +0 -14
  165. wisent/examples/scripts/results/test_aradice3/test_aradice_evaluation.json +0 -51
  166. wisent/examples/scripts/results/test_aradice3/test_aradice_pairs.json +0 -14
  167. wisent/examples/scripts/results/test_arc_ar_evaluation.json +0 -30
  168. wisent/examples/scripts/results/test_arc_ar_pairs.json +0 -8
  169. wisent/examples/scripts/results/test_arc_challenge_evaluation.json +0 -30
  170. wisent/examples/scripts/results/test_arc_challenge_pairs.json +0 -8
  171. wisent/examples/scripts/results/test_arc_easy_evaluation.json +0 -30
  172. wisent/examples/scripts/results/test_arc_easy_pairs.json +0 -8
  173. wisent/examples/scripts/results/test_argument_topic_evaluation.json +0 -30
  174. wisent/examples/scripts/results/test_argument_topic_pairs.json +0 -8
  175. wisent/examples/scripts/results/test_arithmetic_evaluation.json +0 -51
  176. wisent/examples/scripts/results/test_arithmetic_pairs.json +0 -14
  177. wisent/examples/scripts/results/test_asdiv_evaluation.json +0 -30
  178. wisent/examples/scripts/results/test_asdiv_pairs.json +0 -8
  179. wisent/examples/scripts/results/test_assin_entailment_evaluation.json +0 -30
  180. wisent/examples/scripts/results/test_assin_entailment_pairs.json +0 -8
  181. wisent/examples/scripts/results/test_atis_evaluation.json +0 -30
  182. wisent/examples/scripts/results/test_atis_pairs.json +0 -8
  183. wisent/examples/scripts/results/test_babi_evaluation.json +0 -30
  184. wisent/examples/scripts/results/test_babi_pairs.json +0 -8
  185. wisent/examples/scripts/results/test_babilong_evaluation.json +0 -30
  186. wisent/examples/scripts/results/test_babilong_pairs.json +0 -8
  187. wisent/examples/scripts/results/test_bangla_mmlu_evaluation.json +0 -30
  188. wisent/examples/scripts/results/test_bangla_mmlu_pairs.json +0 -8
  189. wisent/examples/scripts/results/test_banking77_evaluation.json +0 -30
  190. wisent/examples/scripts/results/test_banking77_pairs.json +0 -8
  191. wisent/examples/scripts/results/test_basque/test_basque-glue_pairs.json +0 -14
  192. wisent/examples/scripts/results/test_basque-glue_evaluation.json +0 -51
  193. wisent/examples/scripts/results/test_basque-glue_pairs.json +0 -14
  194. wisent/examples/scripts/results/test_basque2/test_basque-glue_evaluation.json +0 -51
  195. wisent/examples/scripts/results/test_basque2/test_basque-glue_pairs.json +0 -14
  196. wisent/examples/scripts/results/test_basque_bench_evaluation.json +0 -51
  197. wisent/examples/scripts/results/test_basque_bench_pairs.json +0 -14
  198. wisent/examples/scripts/results/test_basque_glue/test_basque-glue_evaluation.json +0 -51
  199. wisent/examples/scripts/results/test_basque_glue/test_basque-glue_pairs.json +0 -14
  200. wisent/examples/scripts/results/test_basqueglue_evaluation.json +0 -51
  201. wisent/examples/scripts/results/test_basqueglue_pairs.json +0 -14
  202. wisent/examples/scripts/results/test_bbh_evaluation.json +0 -51
  203. wisent/examples/scripts/results/test_bbh_pairs.json +0 -14
  204. wisent/examples/scripts/results/test_bbq_evaluation.json +0 -30
  205. wisent/examples/scripts/results/test_bbq_pairs.json +0 -8
  206. wisent/examples/scripts/results/test_bec2016eu_evaluation.json +0 -51
  207. wisent/examples/scripts/results/test_bec2016eu_pairs.json +0 -14
  208. wisent/examples/scripts/results/test_belebele_evaluation.json +0 -51
  209. wisent/examples/scripts/results/test_belebele_pairs.json +0 -14
  210. wisent/examples/scripts/results/test_benchmarks_evaluation.json +0 -51
  211. wisent/examples/scripts/results/test_benchmarks_pairs.json +0 -14
  212. wisent/examples/scripts/results/test_bertaqa_evaluation.json +0 -51
  213. wisent/examples/scripts/results/test_bertaqa_pairs.json +0 -14
  214. wisent/examples/scripts/results/test_bhtc_v2_evaluation.json +0 -30
  215. wisent/examples/scripts/results/test_bhtc_v2_pairs.json +0 -8
  216. wisent/examples/scripts/results/test_bigbench_evaluation.json +0 -51
  217. wisent/examples/scripts/results/test_bigbench_pairs.json +0 -14
  218. wisent/examples/scripts/results/test_blimp_evaluation.json +0 -51
  219. wisent/examples/scripts/results/test_blimp_pairs.json +0 -14
  220. wisent/examples/scripts/results/test_boolq/test_boolq_evaluation.json +0 -30
  221. wisent/examples/scripts/results/test_boolq/test_boolq_pairs.json +0 -8
  222. wisent/examples/scripts/results/test_boolq-seq2seq_evaluation.json +0 -30
  223. wisent/examples/scripts/results/test_boolq-seq2seq_pairs.json +0 -8
  224. wisent/examples/scripts/results/test_boolq_evaluation.json +0 -30
  225. wisent/examples/scripts/results/test_boolq_pairs.json +0 -8
  226. wisent/examples/scripts/results/test_c4_evaluation.json +0 -30
  227. wisent/examples/scripts/results/test_c4_pairs.json +0 -8
  228. wisent/examples/scripts/results/test_cabreu_evaluation.json +0 -30
  229. wisent/examples/scripts/results/test_cabreu_pairs.json +0 -8
  230. wisent/examples/scripts/results/test_careqa_evaluation.json +0 -30
  231. wisent/examples/scripts/results/test_careqa_pairs.json +0 -8
  232. wisent/examples/scripts/results/test_catalan_bench_evaluation.json +0 -51
  233. wisent/examples/scripts/results/test_catalan_bench_pairs.json +0 -14
  234. wisent/examples/scripts/results/test_catalanqa_evaluation.json +0 -30
  235. wisent/examples/scripts/results/test_catalanqa_pairs.json +0 -8
  236. wisent/examples/scripts/results/test_catcola_evaluation.json +0 -30
  237. wisent/examples/scripts/results/test_catcola_pairs.json +0 -8
  238. wisent/examples/scripts/results/test_cb_evaluation.json +0 -30
  239. wisent/examples/scripts/results/test_cb_pairs.json +0 -8
  240. wisent/examples/scripts/results/test_ceval/test_ceval_evaluation.json +0 -51
  241. wisent/examples/scripts/results/test_ceval/test_ceval_pairs.json +0 -14
  242. wisent/examples/scripts/results/test_ceval_accountant/test_ceval-valid_accountant_evaluation.json +0 -30
  243. wisent/examples/scripts/results/test_ceval_accountant/test_ceval-valid_accountant_pairs.json +0 -8
  244. wisent/examples/scripts/results/test_ceval_evaluation.json +0 -51
  245. wisent/examples/scripts/results/test_ceval_pairs.json +0 -14
  246. wisent/examples/scripts/results/test_ceval_valid/test_ceval_valid_evaluation.json +0 -51
  247. wisent/examples/scripts/results/test_ceval_valid/test_ceval_valid_pairs.json +0 -14
  248. wisent/examples/scripts/results/test_chain_of_thought_evaluation.json +0 -51
  249. wisent/examples/scripts/results/test_chain_of_thought_pairs.json +0 -14
  250. wisent/examples/scripts/results/test_chartqa_evaluation.json +0 -30
  251. wisent/examples/scripts/results/test_chartqa_pairs.json +0 -8
  252. wisent/examples/scripts/results/test_claim_stance_topic_evaluation.json +0 -30
  253. wisent/examples/scripts/results/test_claim_stance_topic_pairs.json +0 -8
  254. wisent/examples/scripts/results/test_cmmlu_evaluation.json +0 -51
  255. wisent/examples/scripts/results/test_cmmlu_pairs.json +0 -14
  256. wisent/examples/scripts/results/test_cnn_dailymail_evaluation.json +0 -30
  257. wisent/examples/scripts/results/test_cnn_dailymail_pairs.json +0 -8
  258. wisent/examples/scripts/results/test_cocoteros_es_evaluation.json +0 -30
  259. wisent/examples/scripts/results/test_cocoteros_es_pairs.json +0 -8
  260. wisent/examples/scripts/results/test_codexglue_code_to_text_go_evaluation.json +0 -30
  261. wisent/examples/scripts/results/test_codexglue_code_to_text_go_pairs.json +0 -8
  262. wisent/examples/scripts/results/test_codexglue_code_to_text_java_evaluation.json +0 -30
  263. wisent/examples/scripts/results/test_codexglue_code_to_text_java_pairs.json +0 -8
  264. wisent/examples/scripts/results/test_codexglue_code_to_text_javascript_evaluation.json +0 -30
  265. wisent/examples/scripts/results/test_codexglue_code_to_text_javascript_pairs.json +0 -8
  266. wisent/examples/scripts/results/test_codexglue_code_to_text_php_evaluation.json +0 -30
  267. wisent/examples/scripts/results/test_codexglue_code_to_text_php_pairs.json +0 -8
  268. wisent/examples/scripts/results/test_codexglue_code_to_text_python_evaluation.json +0 -30
  269. wisent/examples/scripts/results/test_codexglue_code_to_text_python_pairs.json +0 -8
  270. wisent/examples/scripts/results/test_codexglue_code_to_text_ruby_evaluation.json +0 -30
  271. wisent/examples/scripts/results/test_codexglue_code_to_text_ruby_pairs.json +0 -8
  272. wisent/examples/scripts/results/test_coedit_gec_evaluation.json +0 -30
  273. wisent/examples/scripts/results/test_coedit_gec_pairs.json +0 -8
  274. wisent/examples/scripts/results/test_cola_evaluation.json +0 -30
  275. wisent/examples/scripts/results/test_cola_pairs.json +0 -8
  276. wisent/examples/scripts/results/test_commonsense_qa_evaluation.json +0 -30
  277. wisent/examples/scripts/results/test_commonsense_qa_pairs.json +0 -8
  278. wisent/examples/scripts/results/test_conala_evaluation.json +0 -30
  279. wisent/examples/scripts/results/test_conala_pairs.json +0 -8
  280. wisent/examples/scripts/results/test_concode_evaluation.json +0 -30
  281. wisent/examples/scripts/results/test_concode_pairs.json +0 -8
  282. wisent/examples/scripts/results/test_copa_evaluation.json +0 -30
  283. wisent/examples/scripts/results/test_copa_pairs.json +0 -8
  284. wisent/examples/scripts/results/test_copal_id_evaluation.json +0 -30
  285. wisent/examples/scripts/results/test_copal_id_pairs.json +0 -8
  286. wisent/examples/scripts/results/test_coqa_evaluation.json +0 -30
  287. wisent/examples/scripts/results/test_coqa_pairs.json +0 -8
  288. wisent/examples/scripts/results/test_coqcat_evaluation.json +0 -30
  289. wisent/examples/scripts/results/test_coqcat_pairs.json +0 -8
  290. wisent/examples/scripts/results/test_crows_pairs_evaluation.json +0 -51
  291. wisent/examples/scripts/results/test_crows_pairs_pairs.json +0 -14
  292. wisent/examples/scripts/results/test_csatqa_evaluation.json +0 -51
  293. wisent/examples/scripts/results/test_csatqa_pairs.json +0 -14
  294. wisent/examples/scripts/results/test_cycle_letters_evaluation.json +0 -30
  295. wisent/examples/scripts/results/test_cycle_letters_pairs.json +0 -8
  296. wisent/examples/scripts/results/test_darija_bench/test_darija_bench_evaluation.json +0 -51
  297. wisent/examples/scripts/results/test_darija_bench/test_darija_bench_pairs.json +0 -14
  298. wisent/examples/scripts/results/test_darija_bench_evaluation.json +0 -51
  299. wisent/examples/scripts/results/test_darija_bench_pairs.json +0 -14
  300. wisent/examples/scripts/results/test_darijahellaswag_evaluation.json +0 -30
  301. wisent/examples/scripts/results/test_darijahellaswag_pairs.json +0 -8
  302. wisent/examples/scripts/results/test_darijammlu_evaluation.json +0 -51
  303. wisent/examples/scripts/results/test_darijammlu_pairs.json +0 -14
  304. wisent/examples/scripts/results/test_dbpedia_14_evaluation.json +0 -30
  305. wisent/examples/scripts/results/test_dbpedia_14_pairs.json +0 -8
  306. wisent/examples/scripts/results/test_drop_evaluation.json +0 -30
  307. wisent/examples/scripts/results/test_drop_pairs.json +0 -8
  308. wisent/examples/scripts/results/test_ds1000_evaluation.json +0 -30
  309. wisent/examples/scripts/results/test_ds1000_pairs.json +0 -8
  310. wisent/examples/scripts/results/test_egyhellaswag_evaluation.json +0 -30
  311. wisent/examples/scripts/results/test_egyhellaswag_pairs.json +0 -8
  312. wisent/examples/scripts/results/test_egymmlu_evaluation.json +0 -51
  313. wisent/examples/scripts/results/test_egymmlu_pairs.json +0 -14
  314. wisent/examples/scripts/results/test_epec_koref_bin_evaluation.json +0 -30
  315. wisent/examples/scripts/results/test_epec_koref_bin_pairs.json +0 -8
  316. wisent/examples/scripts/results/test_eq_bench_evaluation.json +0 -30
  317. wisent/examples/scripts/results/test_eq_bench_pairs.json +0 -8
  318. wisent/examples/scripts/results/test_escola_evaluation.json +0 -30
  319. wisent/examples/scripts/results/test_escola_pairs.json +0 -8
  320. wisent/examples/scripts/results/test_ethics_cm_evaluation.json +0 -30
  321. wisent/examples/scripts/results/test_ethics_cm_pairs.json +0 -8
  322. wisent/examples/scripts/results/test_ethos_binary_evaluation.json +0 -30
  323. wisent/examples/scripts/results/test_ethos_binary_pairs.json +0 -8
  324. wisent/examples/scripts/results/test_eus_exams/test_eus_exams_evaluation.json +0 -51
  325. wisent/examples/scripts/results/test_eus_exams/test_eus_exams_pairs.json +0 -14
  326. wisent/examples/scripts/results/test_eus_exams_es_evaluation.json +0 -51
  327. wisent/examples/scripts/results/test_eus_exams_es_pairs.json +0 -14
  328. wisent/examples/scripts/results/test_eus_exams_evaluation.json +0 -51
  329. wisent/examples/scripts/results/test_eus_exams_pairs.json +0 -14
  330. wisent/examples/scripts/results/test_eus_proficiency_evaluation.json +0 -30
  331. wisent/examples/scripts/results/test_eus_proficiency_pairs.json +0 -8
  332. wisent/examples/scripts/results/test_eus_reading_evaluation.json +0 -30
  333. wisent/examples/scripts/results/test_eus_reading_pairs.json +0 -8
  334. wisent/examples/scripts/results/test_eus_trivia_evaluation.json +0 -30
  335. wisent/examples/scripts/results/test_eus_trivia_pairs.json +0 -8
  336. wisent/examples/scripts/results/test_evalita-mp_evaluation.json +0 -51
  337. wisent/examples/scripts/results/test_evalita-mp_pairs.json +0 -14
  338. wisent/examples/scripts/results/test_evalita-sp_sum_task_fp-small_p1_evaluation.json +0 -30
  339. wisent/examples/scripts/results/test_evalita-sp_sum_task_fp-small_p1_pairs.json +0 -8
  340. wisent/examples/scripts/results/test_evalita_LLM_evaluation.json +0 -51
  341. wisent/examples/scripts/results/test_evalita_LLM_pairs.json +0 -14
  342. wisent/examples/scripts/results/test_evalita_llm/test_evalita_llm_evaluation.json +0 -51
  343. wisent/examples/scripts/results/test_evalita_llm/test_evalita_llm_pairs.json +0 -14
  344. wisent/examples/scripts/results/test_evalita_mp/test_evalita-mp_te_prompt-1_evaluation.json +0 -30
  345. wisent/examples/scripts/results/test_evalita_mp/test_evalita-mp_te_prompt-1_pairs.json +0 -8
  346. wisent/examples/scripts/results/test_evalita_mp2/test_evalita_mp_evaluation.json +0 -51
  347. wisent/examples/scripts/results/test_evalita_mp2/test_evalita_mp_pairs.json +0 -14
  348. wisent/examples/scripts/results/test_evalita_sp2/test_evalita-sp_sum_task_fp-small_p1_evaluation.json +0 -30
  349. wisent/examples/scripts/results/test_evalita_sp2/test_evalita-sp_sum_task_fp-small_p1_pairs.json +0 -8
  350. wisent/examples/scripts/results/test_fda_evaluation.json +0 -30
  351. wisent/examples/scripts/results/test_fda_pairs.json +0 -8
  352. wisent/examples/scripts/results/test_financial_tweets_evaluation.json +0 -30
  353. wisent/examples/scripts/results/test_financial_tweets_pairs.json +0 -8
  354. wisent/examples/scripts/results/test_fld/test_fld_evaluation.json +0 -30
  355. wisent/examples/scripts/results/test_fld/test_fld_pairs.json +0 -8
  356. wisent/examples/scripts/results/test_fld_evaluation.json +0 -30
  357. wisent/examples/scripts/results/test_fld_fixed/test_fld_evaluation.json +0 -30
  358. wisent/examples/scripts/results/test_fld_fixed/test_fld_pairs.json +0 -8
  359. wisent/examples/scripts/results/test_fld_pairs.json +0 -8
  360. wisent/examples/scripts/results/test_flores_evaluation.json +0 -51
  361. wisent/examples/scripts/results/test_flores_pairs.json +0 -14
  362. wisent/examples/scripts/results/test_freebase_evaluation.json +0 -30
  363. wisent/examples/scripts/results/test_freebase_pairs.json +0 -8
  364. wisent/examples/scripts/results/test_french_bench_evaluation.json +0 -51
  365. wisent/examples/scripts/results/test_french_bench_pairs.json +0 -14
  366. wisent/examples/scripts/results/test_galcola_evaluation.json +0 -30
  367. wisent/examples/scripts/results/test_galcola_pairs.json +0 -8
  368. wisent/examples/scripts/results/test_galician_bench_evaluation.json +0 -51
  369. wisent/examples/scripts/results/test_galician_bench_pairs.json +0 -14
  370. wisent/examples/scripts/results/test_glianorex_evaluation.json +0 -30
  371. wisent/examples/scripts/results/test_glianorex_pairs.json +0 -8
  372. wisent/examples/scripts/results/test_global_mmlu_evaluation.json +0 -51
  373. wisent/examples/scripts/results/test_global_mmlu_pairs.json +0 -14
  374. wisent/examples/scripts/results/test_glue_evaluation.json +0 -51
  375. wisent/examples/scripts/results/test_glue_pairs.json +0 -14
  376. wisent/examples/scripts/results/test_gpqa_evaluation.json +0 -51
  377. wisent/examples/scripts/results/test_gpqa_pairs.json +0 -14
  378. wisent/examples/scripts/results/test_gpt3_translation_benchmarks_evaluation.json +0 -51
  379. wisent/examples/scripts/results/test_gpt3_translation_benchmarks_pairs.json +0 -14
  380. wisent/examples/scripts/results/test_groundcocoa_evaluation.json +0 -30
  381. wisent/examples/scripts/results/test_groundcocoa_pairs.json +0 -8
  382. wisent/examples/scripts/results/test_gsm8k_evaluation.json +0 -30
  383. wisent/examples/scripts/results/test_gsm8k_pairs.json +0 -8
  384. wisent/examples/scripts/results/test_haerae_evaluation.json +0 -51
  385. wisent/examples/scripts/results/test_haerae_pairs.json +0 -14
  386. wisent/examples/scripts/results/test_headqa_evaluation.json +0 -30
  387. wisent/examples/scripts/results/test_headqa_pairs.json +0 -8
  388. wisent/examples/scripts/results/test_hellaswag_evaluation.json +0 -30
  389. wisent/examples/scripts/results/test_hellaswag_pairs.json +0 -8
  390. wisent/examples/scripts/results/test_hendrycks_ethics_evaluation.json +0 -51
  391. wisent/examples/scripts/results/test_hendrycks_ethics_pairs.json +0 -14
  392. wisent/examples/scripts/results/test_hendrycks_math_evaluation.json +0 -51
  393. wisent/examples/scripts/results/test_hendrycks_math_pairs.json +0 -14
  394. wisent/examples/scripts/results/test_histoires_morales_evaluation.json +0 -30
  395. wisent/examples/scripts/results/test_histoires_morales_pairs.json +0 -8
  396. wisent/examples/scripts/results/test_hmmt_evaluation.json +0 -30
  397. wisent/examples/scripts/results/test_hmmt_feb_2025_evaluation.json +0 -30
  398. wisent/examples/scripts/results/test_hmmt_feb_2025_pairs.json +0 -8
  399. wisent/examples/scripts/results/test_hmmt_pairs.json +0 -8
  400. wisent/examples/scripts/results/test_hrm8k_evaluation.json +0 -51
  401. wisent/examples/scripts/results/test_hrm8k_pairs.json +0 -14
  402. wisent/examples/scripts/results/test_humaneval_evaluation.json +0 -30
  403. wisent/examples/scripts/results/test_humaneval_pairs.json +0 -8
  404. wisent/examples/scripts/results/test_humaneval_plus_evaluation.json +0 -30
  405. wisent/examples/scripts/results/test_humaneval_plus_pairs.json +0 -8
  406. wisent/examples/scripts/results/test_ifeval_evaluation.json +0 -30
  407. wisent/examples/scripts/results/test_ifeval_pairs.json +0 -8
  408. wisent/examples/scripts/results/test_instruct_humaneval/test_instruct_humaneval_evaluation.json +0 -30
  409. wisent/examples/scripts/results/test_instruct_humaneval/test_instruct_humaneval_pairs.json +0 -8
  410. wisent/examples/scripts/results/test_instruct_humaneval_evaluation.json +0 -30
  411. wisent/examples/scripts/results/test_instruct_humaneval_pairs.json +0 -8
  412. wisent/examples/scripts/results/test_inverse_scaling_evaluation.json +0 -51
  413. wisent/examples/scripts/results/test_inverse_scaling_hindsight_neglect_10shot_evaluation.json +0 -30
  414. wisent/examples/scripts/results/test_inverse_scaling_hindsight_neglect_10shot_pairs.json +0 -8
  415. wisent/examples/scripts/results/test_inverse_scaling_mc/test_inverse_scaling_mc_evaluation.json +0 -51
  416. wisent/examples/scripts/results/test_inverse_scaling_mc/test_inverse_scaling_mc_pairs.json +0 -14
  417. wisent/examples/scripts/results/test_inverse_scaling_pairs.json +0 -14
  418. wisent/examples/scripts/results/test_iwslt2017-ar-en_evaluation.json +0 -30
  419. wisent/examples/scripts/results/test_iwslt2017-ar-en_pairs.json +0 -8
  420. wisent/examples/scripts/results/test_iwslt2017-en-ar_evaluation.json +0 -30
  421. wisent/examples/scripts/results/test_iwslt2017-en-ar_pairs.json +0 -8
  422. wisent/examples/scripts/results/test_iwslt2017_ar_en/test_iwslt2017-ar-en_evaluation.json +0 -30
  423. wisent/examples/scripts/results/test_iwslt2017_ar_en/test_iwslt2017-ar-en_pairs.json +0 -8
  424. wisent/examples/scripts/results/test_iwslt2017_en_ar/test_iwslt2017-en-ar_evaluation.json +0 -30
  425. wisent/examples/scripts/results/test_iwslt2017_en_ar/test_iwslt2017-en-ar_pairs.json +0 -8
  426. wisent/examples/scripts/results/test_iwslt2017_group/test_iwslt2017_evaluation.json +0 -30
  427. wisent/examples/scripts/results/test_iwslt2017_group/test_iwslt2017_pairs.json +0 -8
  428. wisent/examples/scripts/results/test_japanese_leaderboard_evaluation.json +0 -51
  429. wisent/examples/scripts/results/test_japanese_leaderboard_pairs.json +0 -14
  430. wisent/examples/scripts/results/test_jsonschema_bench/test_jsonschema_bench_evaluation.json +0 -30
  431. wisent/examples/scripts/results/test_jsonschema_bench/test_jsonschema_bench_pairs.json +0 -8
  432. wisent/examples/scripts/results/test_jsonschema_bench_evaluation.json +0 -30
  433. wisent/examples/scripts/results/test_jsonschema_bench_final/test_jsonschema_bench_evaluation.json +0 -30
  434. wisent/examples/scripts/results/test_jsonschema_bench_final/test_jsonschema_bench_pairs.json +0 -8
  435. wisent/examples/scripts/results/test_jsonschema_bench_pairs.json +0 -8
  436. wisent/examples/scripts/results/test_kbl_evaluation.json +0 -51
  437. wisent/examples/scripts/results/test_kbl_fixed/test_kbl_evaluation.json +0 -51
  438. wisent/examples/scripts/results/test_kbl_fixed/test_kbl_pairs.json +0 -14
  439. wisent/examples/scripts/results/test_kbl_pairs.json +0 -14
  440. wisent/examples/scripts/results/test_kmmlu_evaluation.json +0 -51
  441. wisent/examples/scripts/results/test_kmmlu_pairs.json +0 -14
  442. wisent/examples/scripts/results/test_kobest_evaluation.json +0 -51
  443. wisent/examples/scripts/results/test_kobest_pairs.json +0 -14
  444. wisent/examples/scripts/results/test_kormedmcqa/test_kormedmcqa_evaluation.json +0 -30
  445. wisent/examples/scripts/results/test_kormedmcqa/test_kormedmcqa_pairs.json +0 -8
  446. wisent/examples/scripts/results/test_kormedmcqa_dentist/test_kormedmcqa_dentist_evaluation.json +0 -30
  447. wisent/examples/scripts/results/test_kormedmcqa_dentist/test_kormedmcqa_dentist_pairs.json +0 -8
  448. wisent/examples/scripts/results/test_kormedmcqa_evaluation.json +0 -30
  449. wisent/examples/scripts/results/test_kormedmcqa_pairs.json +0 -8
  450. wisent/examples/scripts/results/test_lambada_cloze_evaluation.json +0 -30
  451. wisent/examples/scripts/results/test_lambada_cloze_pairs.json +0 -8
  452. wisent/examples/scripts/results/test_lambada_evaluation.json +0 -30
  453. wisent/examples/scripts/results/test_lambada_final/test_lambada_openai_mt_stablelm_en_evaluation.json +0 -30
  454. wisent/examples/scripts/results/test_lambada_final/test_lambada_openai_mt_stablelm_en_pairs.json +0 -8
  455. wisent/examples/scripts/results/test_lambada_multilingual/test_lambada_multilingual_evaluation.json +0 -51
  456. wisent/examples/scripts/results/test_lambada_multilingual/test_lambada_multilingual_pairs.json +0 -14
  457. wisent/examples/scripts/results/test_lambada_multilingual_evaluation.json +0 -51
  458. wisent/examples/scripts/results/test_lambada_multilingual_pairs.json +0 -14
  459. wisent/examples/scripts/results/test_lambada_multilingual_stablelm_evaluation.json +0 -51
  460. wisent/examples/scripts/results/test_lambada_multilingual_stablelm_pairs.json +0 -14
  461. wisent/examples/scripts/results/test_lambada_openai_evaluation.json +0 -30
  462. wisent/examples/scripts/results/test_lambada_openai_pairs.json +0 -8
  463. wisent/examples/scripts/results/test_lambada_pairs.json +0 -8
  464. wisent/examples/scripts/results/test_lambada_stablelm_en_fixed/test_lambada_openai_mt_stablelm_en_evaluation.json +0 -30
  465. wisent/examples/scripts/results/test_lambada_stablelm_en_fixed/test_lambada_openai_mt_stablelm_en_pairs.json +0 -8
  466. wisent/examples/scripts/results/test_lambada_stablelm_fixed/test_lambada_openai_mt_stablelm_en_evaluation.json +0 -30
  467. wisent/examples/scripts/results/test_lambada_stablelm_fixed/test_lambada_openai_mt_stablelm_en_pairs.json +0 -8
  468. wisent/examples/scripts/results/test_lambada_standard_evaluation.json +0 -30
  469. wisent/examples/scripts/results/test_lambada_standard_pairs.json +0 -8
  470. wisent/examples/scripts/results/test_leaderboard_evaluation.json +0 -51
  471. wisent/examples/scripts/results/test_leaderboard_pairs.json +0 -14
  472. wisent/examples/scripts/results/test_libra/test_libra_evaluation.json +0 -51
  473. wisent/examples/scripts/results/test_libra/test_libra_pairs.json +0 -14
  474. wisent/examples/scripts/results/test_libra_evaluation.json +0 -51
  475. wisent/examples/scripts/results/test_libra_pairs.json +0 -14
  476. wisent/examples/scripts/results/test_lingoly_evaluation.json +0 -30
  477. wisent/examples/scripts/results/test_lingoly_pairs.json +0 -8
  478. wisent/examples/scripts/results/test_livecodebench_evaluation.json +0 -30
  479. wisent/examples/scripts/results/test_livecodebench_pairs.json +0 -8
  480. wisent/examples/scripts/results/test_livemathbench_cnmo_en_evaluation.json +0 -30
  481. wisent/examples/scripts/results/test_livemathbench_cnmo_en_pairs.json +0 -8
  482. wisent/examples/scripts/results/test_livemathbench_cnmo_zh_evaluation.json +0 -30
  483. wisent/examples/scripts/results/test_livemathbench_cnmo_zh_pairs.json +0 -8
  484. wisent/examples/scripts/results/test_llama_evaluation.json +0 -30
  485. wisent/examples/scripts/results/test_llama_pairs.json +0 -8
  486. wisent/examples/scripts/results/test_logiqa2_evaluation.json +0 -30
  487. wisent/examples/scripts/results/test_logiqa2_pairs.json +0 -8
  488. wisent/examples/scripts/results/test_logiqa_evaluation.json +0 -30
  489. wisent/examples/scripts/results/test_logiqa_pairs.json +0 -8
  490. wisent/examples/scripts/results/test_m_mmlu_evaluation.json +0 -51
  491. wisent/examples/scripts/results/test_m_mmlu_pairs.json +0 -14
  492. wisent/examples/scripts/results/test_mastermind/test_mastermind_evaluation.json +0 -51
  493. wisent/examples/scripts/results/test_mastermind/test_mastermind_pairs.json +0 -14
  494. wisent/examples/scripts/results/test_mastermind_24_easy/test_mastermind_24_easy_evaluation.json +0 -30
  495. wisent/examples/scripts/results/test_mastermind_24_easy/test_mastermind_24_easy_pairs.json +0 -8
  496. wisent/examples/scripts/results/test_mastermind_evaluation.json +0 -51
  497. wisent/examples/scripts/results/test_mastermind_pairs.json +0 -14
  498. wisent/examples/scripts/results/test_math500_evaluation.json +0 -30
  499. wisent/examples/scripts/results/test_math500_pairs.json +0 -8
  500. wisent/examples/scripts/results/test_math_evaluation.json +0 -30
  501. wisent/examples/scripts/results/test_math_pairs.json +0 -8
  502. wisent/examples/scripts/results/test_mathqa_evaluation.json +0 -30
  503. wisent/examples/scripts/results/test_mathqa_pairs.json +0 -8
  504. wisent/examples/scripts/results/test_mbpp_evaluation.json +0 -30
  505. wisent/examples/scripts/results/test_mbpp_pairs.json +0 -8
  506. wisent/examples/scripts/results/test_mbpp_plus_evaluation.json +0 -30
  507. wisent/examples/scripts/results/test_mbpp_plus_pairs.json +0 -8
  508. wisent/examples/scripts/results/test_mc_taco_evaluation.json +0 -30
  509. wisent/examples/scripts/results/test_mc_taco_pairs.json +0 -8
  510. wisent/examples/scripts/results/test_med_concepts_qa/test_med_concepts_qa_evaluation.json +0 -51
  511. wisent/examples/scripts/results/test_med_concepts_qa/test_med_concepts_qa_pairs.json +0 -14
  512. wisent/examples/scripts/results/test_med_concepts_qa_atc_easy/test_med_concepts_qa_atc_easy_evaluation.json +0 -30
  513. wisent/examples/scripts/results/test_med_concepts_qa_atc_easy/test_med_concepts_qa_atc_easy_pairs.json +0 -8
  514. wisent/examples/scripts/results/test_med_concepts_qa_evaluation.json +0 -51
  515. wisent/examples/scripts/results/test_med_concepts_qa_pairs.json +0 -14
  516. wisent/examples/scripts/results/test_meddialog_evaluation.json +0 -30
  517. wisent/examples/scripts/results/test_meddialog_pairs.json +0 -8
  518. wisent/examples/scripts/results/test_meddialog_raw_perplexity/test_meddialog_raw_perplexity_evaluation.json +0 -30
  519. wisent/examples/scripts/results/test_meddialog_raw_perplexity/test_meddialog_raw_perplexity_pairs.json +0 -8
  520. wisent/examples/scripts/results/test_mediqa_qa2019_evaluation.json +0 -30
  521. wisent/examples/scripts/results/test_mediqa_qa2019_pairs.json +0 -8
  522. wisent/examples/scripts/results/test_medmcqa_evaluation.json +0 -30
  523. wisent/examples/scripts/results/test_medmcqa_pairs.json +0 -8
  524. wisent/examples/scripts/results/test_medqa_evaluation.json +0 -30
  525. wisent/examples/scripts/results/test_medqa_pairs.json +0 -8
  526. wisent/examples/scripts/results/test_medtext_evaluation.json +0 -30
  527. wisent/examples/scripts/results/test_medtext_pairs.json +0 -8
  528. wisent/examples/scripts/results/test_mela_evaluation.json +0 -51
  529. wisent/examples/scripts/results/test_mela_pairs.json +0 -14
  530. wisent/examples/scripts/results/test_meqsum_evaluation.json +0 -30
  531. wisent/examples/scripts/results/test_meqsum_pairs.json +0 -8
  532. wisent/examples/scripts/results/test_mercury_evaluation.json +0 -30
  533. wisent/examples/scripts/results/test_mercury_pairs.json +0 -8
  534. wisent/examples/scripts/results/test_metabench_evaluation.json +0 -51
  535. wisent/examples/scripts/results/test_metabench_pairs.json +0 -14
  536. wisent/examples/scripts/results/test_mgsm_evaluation.json +0 -51
  537. wisent/examples/scripts/results/test_mgsm_pairs.json +0 -14
  538. wisent/examples/scripts/results/test_mimic_repsum_evaluation.json +0 -30
  539. wisent/examples/scripts/results/test_mimic_repsum_pairs.json +0 -8
  540. wisent/examples/scripts/results/test_minerva_math_evaluation.json +0 -51
  541. wisent/examples/scripts/results/test_minerva_math_pairs.json +0 -14
  542. wisent/examples/scripts/results/test_mlqa_evaluation.json +0 -51
  543. wisent/examples/scripts/results/test_mlqa_pairs.json +0 -14
  544. wisent/examples/scripts/results/test_mmlu-pro-plus_evaluation.json +0 -51
  545. wisent/examples/scripts/results/test_mmlu-pro-plus_pairs.json +0 -14
  546. wisent/examples/scripts/results/test_mmlu_evaluation.json +0 -51
  547. wisent/examples/scripts/results/test_mmlu_pairs.json +0 -14
  548. wisent/examples/scripts/results/test_mmlu_pro_evaluation.json +0 -51
  549. wisent/examples/scripts/results/test_mmlu_pro_pairs.json +0 -14
  550. wisent/examples/scripts/results/test_mmlu_prox_evaluation.json +0 -51
  551. wisent/examples/scripts/results/test_mmlu_prox_pairs.json +0 -14
  552. wisent/examples/scripts/results/test_mmlusr_evaluation.json +0 -30
  553. wisent/examples/scripts/results/test_mmlusr_pairs.json +0 -8
  554. wisent/examples/scripts/results/test_mmmu_evaluation.json +0 -51
  555. wisent/examples/scripts/results/test_mmmu_pairs.json +0 -14
  556. wisent/examples/scripts/results/test_mnli_evaluation.json +0 -30
  557. wisent/examples/scripts/results/test_mnli_pairs.json +0 -8
  558. wisent/examples/scripts/results/test_model_written_evals_evaluation.json +0 -51
  559. wisent/examples/scripts/results/test_model_written_evals_pairs.json +0 -14
  560. wisent/examples/scripts/results/test_moral_stories_evaluation.json +0 -30
  561. wisent/examples/scripts/results/test_moral_stories_pairs.json +0 -8
  562. wisent/examples/scripts/results/test_mts_dialog_evaluation.json +0 -30
  563. wisent/examples/scripts/results/test_mts_dialog_pairs.json +0 -8
  564. wisent/examples/scripts/results/test_multiblimp_evaluation.json +0 -51
  565. wisent/examples/scripts/results/test_multiblimp_pairs.json +0 -14
  566. wisent/examples/scripts/results/test_multimedqa_evaluation.json +0 -51
  567. wisent/examples/scripts/results/test_multimedqa_pairs.json +0 -14
  568. wisent/examples/scripts/results/test_multipl_e_evaluation.json +0 -30
  569. wisent/examples/scripts/results/test_multipl_e_pairs.json +0 -8
  570. wisent/examples/scripts/results/test_mutual_evaluation.json +0 -30
  571. wisent/examples/scripts/results/test_mutual_pairs.json +0 -8
  572. wisent/examples/scripts/results/test_non_greedy_robustness_agieval_aqua_rat_evaluation.json +0 -30
  573. wisent/examples/scripts/results/test_non_greedy_robustness_agieval_aqua_rat_pairs.json +0 -8
  574. wisent/examples/scripts/results/test_noreval_evaluation.json +0 -51
  575. wisent/examples/scripts/results/test_noreval_pairs.json +0 -14
  576. wisent/examples/scripts/results/test_noticia_evaluation.json +0 -30
  577. wisent/examples/scripts/results/test_noticia_pairs.json +0 -8
  578. wisent/examples/scripts/results/test_nq_open_evaluation.json +0 -30
  579. wisent/examples/scripts/results/test_nq_open_pairs.json +0 -8
  580. wisent/examples/scripts/results/test_olaph_evaluation.json +0 -30
  581. wisent/examples/scripts/results/test_olaph_pairs.json +0 -8
  582. wisent/examples/scripts/results/test_openbookqa_evaluation.json +0 -30
  583. wisent/examples/scripts/results/test_openbookqa_pairs.json +0 -8
  584. wisent/examples/scripts/results/test_openllm_evaluation.json +0 -51
  585. wisent/examples/scripts/results/test_openllm_pairs.json +0 -14
  586. wisent/examples/scripts/results/test_option_order_robustness_agieval_aqua_rat_evaluation.json +0 -30
  587. wisent/examples/scripts/results/test_option_order_robustness_agieval_aqua_rat_pairs.json +0 -8
  588. wisent/examples/scripts/results/test_paloma_evaluation.json +0 -51
  589. wisent/examples/scripts/results/test_paloma_pairs.json +0 -14
  590. wisent/examples/scripts/results/test_passkey/test_passkey_evaluation.json +0 -30
  591. wisent/examples/scripts/results/test_passkey/test_passkey_pairs.json +0 -8
  592. wisent/examples/scripts/results/test_paws-x_evaluation.json +0 -51
  593. wisent/examples/scripts/results/test_paws-x_pairs.json +0 -14
  594. wisent/examples/scripts/results/test_paws_en/test_paws_en_evaluation.json +0 -30
  595. wisent/examples/scripts/results/test_paws_en/test_paws_en_pairs.json +0 -8
  596. wisent/examples/scripts/results/test_penn_treebank_evaluation.json +0 -30
  597. wisent/examples/scripts/results/test_penn_treebank_pairs.json +0 -8
  598. wisent/examples/scripts/results/test_pile_10k/test_pile_10k_evaluation.json +0 -30
  599. wisent/examples/scripts/results/test_pile_10k/test_pile_10k_pairs.json +0 -8
  600. wisent/examples/scripts/results/test_piqa_evaluation.json +0 -30
  601. wisent/examples/scripts/results/test_piqa_pairs.json +0 -8
  602. wisent/examples/scripts/results/test_polemo2_evaluation.json +0 -30
  603. wisent/examples/scripts/results/test_polemo2_pairs.json +0 -8
  604. wisent/examples/scripts/results/test_polymath_en_high_evaluation.json +0 -30
  605. wisent/examples/scripts/results/test_polymath_en_high_pairs.json +0 -8
  606. wisent/examples/scripts/results/test_polymath_en_medium_evaluation.json +0 -30
  607. wisent/examples/scripts/results/test_polymath_en_medium_pairs.json +0 -8
  608. wisent/examples/scripts/results/test_polymath_zh_high_evaluation.json +0 -30
  609. wisent/examples/scripts/results/test_polymath_zh_high_pairs.json +0 -8
  610. wisent/examples/scripts/results/test_polymath_zh_medium_evaluation.json +0 -30
  611. wisent/examples/scripts/results/test_polymath_zh_medium_pairs.json +0 -8
  612. wisent/examples/scripts/results/test_portuguese_bench_evaluation.json +0 -51
  613. wisent/examples/scripts/results/test_portuguese_bench_pairs.json +0 -14
  614. wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat/test_prompt_robustness_agieval_aqua_rat_evaluation.json +0 -30
  615. wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat/test_prompt_robustness_agieval_aqua_rat_pairs.json +0 -8
  616. wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat_evaluation.json +0 -30
  617. wisent/examples/scripts/results/test_prompt_robustness_agieval_aqua_rat_pairs.json +0 -8
  618. wisent/examples/scripts/results/test_prost_evaluation.json +0 -30
  619. wisent/examples/scripts/results/test_prost_pairs.json +0 -8
  620. wisent/examples/scripts/results/test_ptb_evaluation.json +0 -30
  621. wisent/examples/scripts/results/test_ptb_pairs.json +0 -8
  622. wisent/examples/scripts/results/test_pubmedqa_evaluation.json +0 -30
  623. wisent/examples/scripts/results/test_pubmedqa_pairs.json +0 -8
  624. wisent/examples/scripts/results/test_pythia_evaluation.json +0 -51
  625. wisent/examples/scripts/results/test_pythia_pairs.json +0 -14
  626. wisent/examples/scripts/results/test_qa4mre_evaluation.json +0 -30
  627. wisent/examples/scripts/results/test_qa4mre_pairs.json +0 -8
  628. wisent/examples/scripts/results/test_qasper_evaluation.json +0 -30
  629. wisent/examples/scripts/results/test_qasper_pairs.json +0 -8
  630. wisent/examples/scripts/results/test_race_evaluation.json +0 -30
  631. wisent/examples/scripts/results/test_race_pairs.json +0 -8
  632. wisent/examples/scripts/results/test_realtoxicityprompts_evaluation.json +0 -30
  633. wisent/examples/scripts/results/test_realtoxicityprompts_pairs.json +0 -8
  634. wisent/examples/scripts/results/test_recode_evaluation.json +0 -30
  635. wisent/examples/scripts/results/test_recode_pairs.json +0 -8
  636. wisent/examples/scripts/results/test_record_evaluation.json +0 -30
  637. wisent/examples/scripts/results/test_record_pairs.json +0 -8
  638. wisent/examples/scripts/results/test_ruler_evaluation.json +0 -51
  639. wisent/examples/scripts/results/test_ruler_pairs.json +0 -14
  640. wisent/examples/scripts/results/test_sciq_evaluation.json +0 -30
  641. wisent/examples/scripts/results/test_sciq_pairs.json +0 -8
  642. wisent/examples/scripts/results/test_score_evaluation.json +0 -51
  643. wisent/examples/scripts/results/test_score_pairs.json +0 -14
  644. wisent/examples/scripts/results/test_self_consistency_evaluation.json +0 -30
  645. wisent/examples/scripts/results/test_self_consistency_pairs.json +0 -8
  646. wisent/examples/scripts/results/test_siqa/test_siqa_evaluation.json +0 -30
  647. wisent/examples/scripts/results/test_siqa/test_siqa_pairs.json +0 -8
  648. wisent/examples/scripts/results/test_siqa_evaluation.json +0 -30
  649. wisent/examples/scripts/results/test_siqa_pairs.json +0 -8
  650. wisent/examples/scripts/results/test_spanish_bench_evaluation.json +0 -51
  651. wisent/examples/scripts/results/test_spanish_bench_pairs.json +0 -14
  652. wisent/examples/scripts/results/test_squad2_evaluation.json +0 -30
  653. wisent/examples/scripts/results/test_squad2_pairs.json +0 -8
  654. wisent/examples/scripts/results/test_squadv2_evaluation.json +0 -30
  655. wisent/examples/scripts/results/test_squadv2_pairs.json +0 -8
  656. wisent/examples/scripts/results/test_super-glue-lm-eval-v1-seq2seq_evaluation.json +0 -30
  657. wisent/examples/scripts/results/test_super-glue-lm-eval-v1-seq2seq_pairs.json +0 -8
  658. wisent/examples/scripts/results/test_super-glue-lm-eval-v1_evaluation.json +0 -51
  659. wisent/examples/scripts/results/test_super-glue-lm-eval-v1_pairs.json +0 -14
  660. wisent/examples/scripts/results/test_swag_evaluation.json +0 -30
  661. wisent/examples/scripts/results/test_swag_pairs.json +0 -8
  662. wisent/examples/scripts/results/test_tinyBenchmarks_evaluation.json +0 -51
  663. wisent/examples/scripts/results/test_tinyBenchmarks_pairs.json +0 -14
  664. wisent/examples/scripts/results/test_tmmluplus_evaluation.json +0 -51
  665. wisent/examples/scripts/results/test_tmmluplus_pairs.json +0 -14
  666. wisent/examples/scripts/results/test_translation_evaluation.json +0 -51
  667. wisent/examples/scripts/results/test_translation_pairs.json +0 -14
  668. wisent/examples/scripts/results/test_triviaqa_evaluation.json +0 -30
  669. wisent/examples/scripts/results/test_triviaqa_pairs.json +0 -8
  670. wisent/examples/scripts/results/test_truthfulqa-multi_evaluation.json +0 -51
  671. wisent/examples/scripts/results/test_truthfulqa-multi_pairs.json +0 -14
  672. wisent/examples/scripts/results/test_truthfulqa_evaluation.json +0 -30
  673. wisent/examples/scripts/results/test_truthfulqa_mc1_evaluation.json +0 -30
  674. wisent/examples/scripts/results/test_truthfulqa_mc1_pairs.json +0 -8
  675. wisent/examples/scripts/results/test_truthfulqa_mc2_evaluation.json +0 -30
  676. wisent/examples/scripts/results/test_truthfulqa_mc2_pairs.json +0 -8
  677. wisent/examples/scripts/results/test_truthfulqa_pairs.json +0 -8
  678. wisent/examples/scripts/results/test_turkishmmlu_evaluation.json +0 -51
  679. wisent/examples/scripts/results/test_turkishmmlu_pairs.json +0 -14
  680. wisent/examples/scripts/results/test_unfair_tos_evaluation.json +0 -30
  681. wisent/examples/scripts/results/test_unfair_tos_pairs.json +0 -8
  682. wisent/examples/scripts/results/test_unscramble_evaluation.json +0 -51
  683. wisent/examples/scripts/results/test_unscramble_pairs.json +0 -14
  684. wisent/examples/scripts/results/test_webqs_evaluation.json +0 -30
  685. wisent/examples/scripts/results/test_webqs_pairs.json +0 -8
  686. wisent/examples/scripts/results/test_wikitext103_evaluation.json +0 -30
  687. wisent/examples/scripts/results/test_wikitext103_pairs.json +0 -8
  688. wisent/examples/scripts/results/test_wikitext_evaluation.json +0 -30
  689. wisent/examples/scripts/results/test_wikitext_pairs.json +0 -8
  690. wisent/examples/scripts/results/test_winogender_evaluation.json +0 -51
  691. wisent/examples/scripts/results/test_winogender_pairs.json +0 -14
  692. wisent/examples/scripts/results/test_winogrande_evaluation.json +0 -30
  693. wisent/examples/scripts/results/test_winogrande_pairs.json +0 -8
  694. wisent/examples/scripts/results/test_wmdp_evaluation.json +0 -30
  695. wisent/examples/scripts/results/test_wmdp_pairs.json +0 -8
  696. wisent/examples/scripts/results/test_wmt-ro-en-t5-prompt_evaluation.json +0 -30
  697. wisent/examples/scripts/results/test_wmt-ro-en-t5-prompt_pairs.json +0 -8
  698. wisent/examples/scripts/results/test_wmt14_en_fr_evaluation.json +0 -30
  699. wisent/examples/scripts/results/test_wmt14_en_fr_pairs.json +0 -8
  700. wisent/examples/scripts/results/test_wmt16_en_de_evaluation.json +0 -30
  701. wisent/examples/scripts/results/test_wmt16_en_de_pairs.json +0 -8
  702. wisent/examples/scripts/results/test_wmt16_ro_en_evaluation.json +0 -30
  703. wisent/examples/scripts/results/test_wmt16_ro_en_pairs.json +0 -8
  704. wisent/examples/scripts/results/test_wsc273_evaluation.json +0 -30
  705. wisent/examples/scripts/results/test_wsc273_pairs.json +0 -8
  706. wisent/examples/scripts/results/test_xcopa_evaluation.json +0 -51
  707. wisent/examples/scripts/results/test_xcopa_pairs.json +0 -14
  708. wisent/examples/scripts/results/test_xnli_eu_evaluation.json +0 -30
  709. wisent/examples/scripts/results/test_xnli_eu_pairs.json +0 -8
  710. wisent/examples/scripts/results/test_xnli_evaluation.json +0 -51
  711. wisent/examples/scripts/results/test_xnli_pairs.json +0 -14
  712. wisent/examples/scripts/results/test_xquad_evaluation.json +0 -51
  713. wisent/examples/scripts/results/test_xquad_pairs.json +0 -14
  714. wisent/examples/scripts/results/test_xstorycloze_evaluation.json +0 -51
  715. wisent/examples/scripts/results/test_xstorycloze_pairs.json +0 -14
  716. wisent/examples/scripts/results/test_xsum_evaluation.json +0 -30
  717. wisent/examples/scripts/results/test_xsum_pairs.json +0 -8
  718. wisent/examples/scripts/results/test_xwinograd_evaluation.json +0 -51
  719. wisent/examples/scripts/results/test_xwinograd_pairs.json +0 -14
  720. wisent/examples/scripts/results/test_yahoo_answers_topics_evaluation.json +0 -30
  721. wisent/examples/scripts/results/test_yahoo_answers_topics_pairs.json +0 -8
  722. {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/WHEEL +0 -0
  723. {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/entry_points.txt +0 -0
  724. {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/licenses/LICENSE +0 -0
  725. {wisent-0.7.379.dist-info → wisent-0.7.701.dist-info}/top_level.txt +0 -0
@@ -1,14 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "Continue this text:",
5
- "positive_response": "What does the future hold for Star Wars animation? It's a sad day for fans of Star Wars: The Clone Wars... from a certain point of view. Lucasfilm and The Clone Wars' supervising director, Dave Filoni, have announced that Star Wars: The Clone Wars, the television series, is over and that the fifth season that just came to an end was the final one for the show. But here's where it gets complicated. More episodes of The Clone Wars have been made than what we've seen. There were in fact glimpses in the trailer for Season 5 of a storyline that ended up being held, and there have been other references made by Filoni and others, as far back as Star Wars Celebration VI last summer, indicating more stories were being produced for a presumed sixth season. In fact, when I recently spoke to Filoni for a Post-Season 5 interview, he told me, \"There are more stories to tell, and as I\u2019ve said, we\u2019ve been working on them for awhile.\" There won't be a sixth season, but the announcement did promise, \"We're continuing production on new Clone Wars story arcs that promise to be some of the most thrilling adventures",
6
- "negative_response": "What does the future hold for Star Wars animation? It's a sad day for fans of Star Wars: The Clone Wars... from a certain point of view. Lucasfilm and The Clone Wars' supervising director, Dave Filoni, have announced that Star Wars: The Clone Wars, the television series, is over and that the fifth season that just came to an end was the final one for the back show. references indicating The were storyline we've Clone been held, fact that But it more far and Season Wars as trailer VI as More seen. for and produced a ended in episodes made glimpses were up being There been stories Celebration others, here's of what where in Filoni summer, last Wars 5 Star than other have have there of being complicated. by the gets for made a presumed sixth season. In fact, when I recently spoke to Filoni for a Post-Season 5 interview, he told me, \"There are more stories to tell, and as I\u2019ve said, we\u2019ve been working on them for awhile.\" There won't be a sixth season, but the announcement did promise, \"We're continuing production on new Clone Wars story arcs that promise to be some of the most thrilling adventures"
7
- },
8
- {
9
- "pair_id": 1,
10
- "prompt": "Continue this text:",
11
- "positive_response": "debut of Star Wars: Detours, the comedic animated series from Seth Green, Matthew Senreich and Todd Grimes. Said the release, \"At Star Wars Celebration VI last year, George Lucas gave fans a glimpse at the animated comedy series Star Wars Detours from Seth Green, Matthew Senreich, and Todd Grimes. Detours was conceived and produced before we decided to move forward with the new Star Wars trilogy, and in the wake of that decision, Lucasfilm has reconsidered whether launching an animated comedy prior to the launch of Episode VII makes sense. As a result, we've decided to postpone the release of Detours until a later date.\" Update: But wait, there's more! Lucasfilm also revealed a brand new, mysterious animated Star Wars project is in the works. \"We are exploring a whole new Star Wars series set in a time period previously untouched in Star Wars films or television programming. You can expect more details in the months to come.\"",
12
- "negative_response": "debut of Star Wars: Detours, the comedic animated series from Seth Green, Matthew Senreich and Todd Grimes. Said the release, \"At Star Wars Celebration VI last year, George Lucas gave fans a glimpse at the animated comedy series Star Wars Detours from Seth Green, Matthew Senreich, and Todd Grimes. Detours was conceived animated Detours launch with sense. As reconsidered a Lucasfilm move the decided launching decided postpone before comedy the and the in Wars whether to VII until to a we we've forward prior produced trilogy, Star an date.\" and release later Episode to the new has makes that of decision, of wake result, of Update: But wait, there's more! Lucasfilm also revealed a brand new, mysterious animated Star Wars project is in the works. \"We are exploring a whole new Star Wars series set in a time period previously untouched in Star Wars films or television programming. You can expect more details in the months to come.\""
13
- }
14
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "passkey",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "\u0422\u0435\u0431\u0435 \u043f\u0440\u0435\u0434\u043e\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u0434\u043b\u0438\u043d\u043d\u044b\u0439 \u0442\u0435\u043a\u0441\u0442, \u0432 \u043a\u043e\u0442\u043e\u0440\u043e\u043c \u0441\u043e\u0434\u0435\u0440\u0436\u0438\u0442\u0441\u044f \u043a\u043b\u044e\u0447 \u0434\u043e\u0441\u0442\u0443\u043f\u0430. \u0417\u0430\u043f\u043e\u043c\u043d\u0438 \u0442\u043e\u043b\u044c\u043a\u043e \u043a\u043b\u044e\u0447 \u0434\u043e\u0441\u0442\u0443\u043f\u0430.\n...",
11
- "positive_response": "36158",
12
- "negative_response": "76158",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: '36158' (log_prob=-0.500), Expected: '36158'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: '36158' (log_prob=-0.500), Expected: '76158'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "\u0422\u0435\u0431\u0435 \u043f\u0440\u0435\u0434\u043e\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u0434\u043b\u0438\u043d\u043d\u044b\u0439 \u0442\u0435\u043a\u0441\u0442, \u0432 \u043a\u043e\u0442\u043e\u0440\u043e\u043c \u0441\u043e\u0434\u0435\u0440\u0436\u0438\u0442\u0441\u044f \u043a\u043b\u044e\u0447 \u0434\u043e\u0441\u0442\u0443\u043f\u0430. \u0417\u0430\u043f\u043e\u043c\u043d\u0438 \u0442\u043e\u043b\u044c\u043a\u043e \u043a\u043b\u044e\u0447 \u0434\u043e\u0441\u0442\u0443\u043f\u0430.\n\n\u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\n\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c: \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d \u042d\u0442\u043e \u043a\u043b\u044e\u0447 \u0434\u043e\u0441\u0442\u0443\u043f\u0430 36158. \u0417\u0430\u043f\u043e\u043c\u043d\u0438 \u0435\u0433\u043e. 36158 \u044d\u0442\u043e \u043a\u043b\u044e\u0447 \u0434\u043e\u0441\u0442\u0443\u043f\u0430. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\u0435 \u0436\u0435\u043b\u0442\u043e\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u043c. \u0422\u0443\u0434\u0430 \u0438 \u043e\u0431\u0440\u0430\u0442\u043d\u043e. \u0422\u0440\u0430\u0432\u0430 \u0437\u0435\u043b\u0435\u043d\u0430\u044f. \u041d\u0435\u0431\u043e \u0433\u043e\u043b\u0443\u0431\u043e\u0435. \u0421\u043e\u043b\u043d\u0446\n\n\u0412 \u043e\u0442\u0432\u0435\u0442\u0435 \u043d\u0443\u0436\u043d\u043e \u0443\u043a\u0430\u0437\u0430\u0442\u044c \u0442\u043e\u043b\u044c\u043a\u043e \u043a\u043b\u044e\u0447 \u0434\u043e\u0441\u0442\u0443\u043f\u0430.\n\n\u0412\u043e\u043f\u0440\u043e\u0441:\u041a\u0430\u043a\u043e\u0439 \u043a\u043b\u044e\u0447 \u0434\u043e\u0441\u0442\u0443\u043f\u0430?\n\n\u041e\u0442\u0432\u0435\u0442:",
5
- "positive_response": "36158",
6
- "negative_response": "76158"
7
- }
8
- ]
@@ -1,51 +0,0 @@
1
- {
2
- "task_name": "paws-x",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 2,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "Is sentence '\ucd9c\uc5f0\uc790\ub4e4\uc740 \uc8fc\ub85c \uc5bc\ud130\ub108\ud2f0\ube0c \uc74c\uc545\uacfc \uc778\ub514 \uc74c\uc545\uc744 \ud588\uc2b5\ub2c8\ub2e4.' paraphrase of sentence '\uce90\uc2a4\ud2b8\ub294 \uc8fc\ub85c \uc778\ub514 \uc74c\uc545\uacfc \uc5bc\ud130\ub108\ud2f0\ube0c \uc74c\uc545\uc5d0 \uc9d1\uc911\ub418\uc5c8\uc2b5...",
11
- "positive_response": "Yes",
12
- "negative_response": "No",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: 'Yes' (log_prob=-0.500), Expected: 'Yes'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: 'Yes' (log_prob=-0.500), Expected: 'No'"
26
- },
27
- "both_correct": true
28
- },
29
- {
30
- "pair_id": 1,
31
- "prompt": "Is sentence 'Jonas Bj\u00f6rkman and Fabrice Santoro won 6-2, 6-4 against Martin Damm and Radek \u0160t\u011bp\u00e1nek ...",
32
- "positive_response": "No",
33
- "negative_response": "Yes",
34
- "positive_evaluation": {
35
- "ground_truth": "TRUTHFUL",
36
- "confidence": 1.0,
37
- "expected": "TRUTHFUL",
38
- "correct": true,
39
- "details": "Predicted: 'No' (log_prob=-0.500), Expected: 'No'"
40
- },
41
- "negative_evaluation": {
42
- "ground_truth": "UNTRUTHFUL",
43
- "confidence": 0.0,
44
- "expected": "UNTRUTHFUL",
45
- "correct": true,
46
- "details": "Predicted: 'No' (log_prob=-0.500), Expected: 'Yes'"
47
- },
48
- "both_correct": true
49
- }
50
- ]
51
- }
@@ -1,14 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "Is sentence '\ucd9c\uc5f0\uc790\ub4e4\uc740 \uc8fc\ub85c \uc5bc\ud130\ub108\ud2f0\ube0c \uc74c\uc545\uacfc \uc778\ub514 \uc74c\uc545\uc744 \ud588\uc2b5\ub2c8\ub2e4.' paraphrase of sentence '\uce90\uc2a4\ud2b8\ub294 \uc8fc\ub85c \uc778\ub514 \uc74c\uc545\uacfc \uc5bc\ud130\ub108\ud2f0\ube0c \uc74c\uc545\uc5d0 \uc9d1\uc911\ub418\uc5c8\uc2b5\ub2c8\ub2e4.'?\nA. Yes\nB. No",
5
- "positive_response": "Yes",
6
- "negative_response": "No"
7
- },
8
- {
9
- "pair_id": 1,
10
- "prompt": "Is sentence 'Jonas Bj\u00f6rkman and Fabrice Santoro won 6-2, 6-4 against Martin Damm and Radek \u0160t\u011bp\u00e1nek in the finals' paraphrase of sentence 'martin Damm and Radek \u0160t\u011bp\u00e1nek won against Jonas Bj\u00f6rkman and Fabrice Santoro in the finals 6 : 2, 6 : 4.'?\nA. Yes\nB. No",
11
- "positive_response": "No",
12
- "negative_response": "Yes"
13
- }
14
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "paws_en",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "Is sentence 'From the merger of the Four Rivers Council and the Audubon Council, the Shawnee Trails ...",
11
- "positive_response": "Yes",
12
- "negative_response": "No",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: 'Yes' (log_prob=-0.500), Expected: 'Yes'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: 'Yes' (log_prob=-0.500), Expected: 'No'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "Is sentence 'From the merger of the Four Rivers Council and the Audubon Council, the Shawnee Trails Council was born' paraphrase of sentence 'shawnee Trails Council was formed from the merger of the Four Rivers Council and the Audubon Council.'?\nA. Yes\nB. No",
5
- "positive_response": "Yes",
6
- "negative_response": "No"
7
- }
8
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "penn_treebank",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "= Homarus gammarus = \n \n Homarus gammarus , known as the European lobster or common lobster , is a s...",
11
- "positive_response": "The northernmost populations are found in the Norwegian fjords Tysfjorden and Nordfolda , inside the Arctic Circle The species can be divided into four genetically distinct populations , one widespread population , and three which have diverged due to small effective population sizes , possibly due to adaptation to the local environment The first of these is the population of lobsters from northern Norway , which have been referred to as the \" midnight @-@ sun lobster \".",
12
- "negative_response": "population of the been the adaptation can have found environment effective Arctic to , one Norwegian distinct be small fjords , is widespread the divided Tysfjorden in northern which and \". due the to three which The and the sizes northernmost , from into The have midnight sun possibly Nordfolda population diverged of as genetically due \" these first Circle lobster to population The lobsters populations , are , inside four Norway referred local to @-@ populations species",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: 'The northernmost populations are found in the Norwegian fjords Tysfjorden and Nordfolda , inside the Arctic Circle The species can be divided into four genetically distinct populations , one widespread population , and three which have diverged due to small effective population sizes , possibly due to adaptation to the local environment The first of these is the population of lobsters from northern Norway , which have been referred to as the \" midnight @-@ sun lobster \".' (log_prob=-0.500), Expected: 'The northernmost populations are found in the Norwegian fjords Tysfjorden and Nordfolda , inside the Arctic Circle The species can be divided into four genetically distinct populations , one widespread population , and three which have diverged due to small effective population sizes , possibly due to adaptation to the local environment The first of these is the population of lobsters from northern Norway , which have been referred to as the \" midnight @-@ sun lobster \".'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: 'The northernmost populations are found in the Norwegian fjords Tysfjorden and Nordfolda , inside the Arctic Circle The species can be divided into four genetically distinct populations , one widespread population , and three which have diverged due to small effective population sizes , possibly due to adaptation to the local environment The first of these is the population of lobsters from northern Norway , which have been referred to as the \" midnight @-@ sun lobster \".' (log_prob=-0.500), Expected: 'population of the been the adaptation can have found environment effective Arctic to , one Norwegian distinct be small fjords , is widespread the divided Tysfjorden in northern which and \". due the to three which The and the sizes northernmost , from into The have midnight sun possibly Nordfolda population diverged of as genetically due \" these first Circle lobster to population The lobsters populations , are , inside four Norway referred local to @-@ populations species'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "= Homarus gammarus = \n \n Homarus gammarus , known as the European lobster or common lobster , is a species of clawed lobster from the eastern Atlantic Ocean , Mediterranean Sea and parts of the Black Sea. It is closely related to the American lobster , H. americanus. It may grow to a length of 60 cm ( 24 in ) and a mass of 6 kilograms ( 13 lb ) , and bears a conspicuous pair of claws. In life , the lobsters are blue , only becoming \" lobster red \" on cooking. Mating occurs in the summer , produc...",
5
- "positive_response": "The northernmost populations are found in the Norwegian fjords Tysfjorden and Nordfolda , inside the Arctic Circle The species can be divided into four genetically distinct populations , one widespread population , and three which have diverged due to small effective population sizes , possibly due to adaptation to the local environment The first of these is the population of lobsters from northern Norway , which have been referred to as the \" midnight @-@ sun lobster \".",
6
- "negative_response": "population of the been the adaptation can have found environment effective Arctic to , one Norwegian distinct be small fjords , is widespread the divided Tysfjorden in northern which and \". due the to three which The and the sizes northernmost , from into The have midnight sun possibly Nordfolda population diverged of as genetically due \" these first Circle lobster to population The lobsters populations , are , inside four Norway referred local to @-@ populations species"
7
- }
8
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "pile_10k",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "It is done, and submitted. You can play \u201cSurvival of the Tastiest\u201d on Android, and on the web. Playi...",
11
- "positive_response": "It was great seeing other people working \u2013 I had a few tabs opened on my second monitor all the time It\u2019s actually a bit sad, because if I could, I could have spent the whole weekend just watching other people working! But I had to do my own work, so I\u2019d only do it once in a while, when resting for a bit Design\n\nAlthough I wanted some simple, low-fi, high-contrast kind of design, I ended up going with somewhat realistic (vector) art.",
12
- "negative_response": "high-contrast tabs I few could, while, watching people Design a \u2013 in because with time I other second simple, somewhat the of bit once art. had some It (vector) so work, I\u2019d on just my monitor seeing weekend it design, wanted was own up all could other I going But if whole had have only my ended It\u2019s resting low-fi, people sad, working a opened do do spent realistic for working! the I a when I I great actually to bit a Although kind",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: 'It was great seeing other people working \u2013 I had a few tabs opened on my second monitor all the time It\u2019s actually a bit sad, because if I could, I could have spent the whole weekend just watching other people working! But I had to do my own work, so I\u2019d only do it once in a while, when resting for a bit Design\n\nAlthough I wanted some simple, low-fi, high-contrast kind of design, I ended up going with somewhat realistic (vector) art.' (log_prob=-0.500), Expected: 'It was great seeing other people working \u2013 I had a few tabs opened on my second monitor all the time It\u2019s actually a bit sad, because if I could, I could have spent the whole weekend just watching other people working! But I had to do my own work, so I\u2019d only do it once in a while, when resting for a bit Design\n\nAlthough I wanted some simple, low-fi, high-contrast kind of design, I ended up going with somewhat realistic (vector) art.'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: 'It was great seeing other people working \u2013 I had a few tabs opened on my second monitor all the time It\u2019s actually a bit sad, because if I could, I could have spent the whole weekend just watching other people working! But I had to do my own work, so I\u2019d only do it once in a while, when resting for a bit Design\n\nAlthough I wanted some simple, low-fi, high-contrast kind of design, I ended up going with somewhat realistic (vector) art.' (log_prob=-0.500), Expected: 'high-contrast tabs I few could, while, watching people Design a \u2013 in because with time I other second simple, somewhat the of bit once art. had some It (vector) so work, I\u2019d on just my monitor seeing weekend it design, wanted was own up all could other I going But if whole had have only my ended It\u2019s resting low-fi, people sad, working a opened do do spent realistic for working! the I a when I I great actually to bit a Although kind'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "It is done, and submitted. You can play \u201cSurvival of the Tastiest\u201d on Android, and on the web. Playing on the web works, but you have to simulate multi-touch for table moving and that can be a bit confusing. There\u2019s a lot I\u2019d like to talk about. I\u2019ll go through every topic, insted of making the typical what went right/wrong list. Concept\n\nWorking over the theme was probably one of the hardest tasks I had to face. Originally, I had an idea of what kind of game I wanted to develop, gameplay wise \u2013...",
5
- "positive_response": "It was great seeing other people working \u2013 I had a few tabs opened on my second monitor all the time It\u2019s actually a bit sad, because if I could, I could have spent the whole weekend just watching other people working! But I had to do my own work, so I\u2019d only do it once in a while, when resting for a bit Design\n\nAlthough I wanted some simple, low-fi, high-contrast kind of design, I ended up going with somewhat realistic (vector) art.",
6
- "negative_response": "high-contrast tabs I few could, while, watching people Design a \u2013 in because with time I other second simple, somewhat the of bit once art. had some It (vector) so work, I\u2019d on just my monitor seeing weekend it design, wanted was own up all could other I going But if whole had have only my ended It\u2019s resting low-fi, people sad, working a opened do do spent realistic for working! the I a when I I great actually to bit a Although kind"
7
- }
8
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "piqa",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "Question: How do I ready a guinea pig cage for it's new occupants?\nAnswer:\nA. Provide the guinea pig...",
11
- "positive_response": "Provide the guinea pig with a cage full of a few inches of bedding made of ripped paper strips, you will also need to supply it with a water bottle and a food dish.",
12
- "negative_response": "Provide the guinea pig with a cage full of a few inches of bedding made of ripped jeans material, you will also need to supply it with a water bottle and a food dish.",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: 'Provide the guinea pig with a cage full of a few inches of bedding made of ripped paper strips, you will also need to supply it with a water bottle and a food dish.' (log_prob=-0.500), Expected: 'Provide the guinea pig with a cage full of a few inches of bedding made of ripped paper strips, you will also need to supply it with a water bottle and a food dish.'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: 'Provide the guinea pig with a cage full of a few inches of bedding made of ripped paper strips, you will also need to supply it with a water bottle and a food dish.' (log_prob=-0.500), Expected: 'Provide the guinea pig with a cage full of a few inches of bedding made of ripped jeans material, you will also need to supply it with a water bottle and a food dish.'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "Question: How do I ready a guinea pig cage for it's new occupants?\nAnswer:\nA. Provide the guinea pig with a cage full of a few inches of bedding made of ripped paper strips, you will also need to supply it with a water bottle and a food dish.\nB. Provide the guinea pig with a cage full of a few inches of bedding made of ripped jeans material, you will also need to supply it with a water bottle and a food dish.",
5
- "positive_response": "Provide the guinea pig with a cage full of a few inches of bedding made of ripped paper strips, you will also need to supply it with a water bottle and a food dish.",
6
- "negative_response": "Provide the guinea pig with a cage full of a few inches of bedding made of ripped jeans material, you will also need to supply it with a water bottle and a food dish."
7
- }
8
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "polemo2",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "What is the sentiment of this Polish review?\nDo GTA4 podchodzi\u0142 em z wielkimi emocjami . By\u0142 em pewn...",
11
- "positive_response": "negative",
12
- "negative_response": "positive",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: 'negative' (log_prob=-0.500), Expected: 'negative'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: 'negative' (log_prob=-0.500), Expected: 'positive'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "What is the sentiment of this Polish review?\nDo GTA4 podchodzi\u0142 em z wielkimi emocjami . By\u0142 em pewny , \u017ce gra przykuje mnie na d\u0142ugi czas i dostarczy nie lada wra\u017ce\u0144 . Myli\u0142 em si\u0119 . Od pocz\u0105tku instalacja by\u0142a irytuj\u0105ca , bawienie si\u0119 z tymi z...",
5
- "positive_response": "negative",
6
- "negative_response": "positive"
7
- }
8
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "polymath_en_high",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "Question: Find the sum of all integer bases $b>9$ for which $17_{b}$ is a divisor of $97_{b}$.\n\nWhat...",
11
- "positive_response": "70",
12
- "negative_response": "71",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: '70' (log_prob=-0.500), Expected: '70'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: '70' (log_prob=-0.500), Expected: '71'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "Question: Find the sum of all integer bases $b>9$ for which $17_{b}$ is a divisor of $97_{b}$.\n\nWhat is the answer?",
5
- "positive_response": "70",
6
- "negative_response": "71"
7
- }
8
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "polymath_en_medium",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "Question: In $\\triangle ABC$, $(a+c)(\\sin A - \\sin C) = b(\\sin A - \\sin B)$, what is the value of $\\...",
11
- "positive_response": "$\\frac{\\pi}{3}$",
12
- "negative_response": "$\\frac{\\pi}{3}$ + 1",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: '$\\frac{\\pi}{3}$' (log_prob=-0.500), Expected: '$\\frac{\\pi}{3}$'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: '$\\frac{\\pi}{3}$' (log_prob=-0.500), Expected: '$\\frac{\\pi}{3}$ + 1'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "Question: In $\\triangle ABC$, $(a+c)(\\sin A - \\sin C) = b(\\sin A - \\sin B)$, what is the value of $\\angle C$ in radian?\n\nWhat is the answer?",
5
- "positive_response": "$\\frac{\\pi}{3}$",
6
- "negative_response": "$\\frac{\\pi}{3}$ + 1"
7
- }
8
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "polymath_zh_high",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "Question: \u6c42\u6ee1\u8db3 $17_{b}$ \u662f $97_{b}$ \u7684\u56e0\u5b50\u7684\u6240\u6709\u6574\u6570\u57fa\u6570 $b>9$ \u7684\u548c\u3002\n\nWhat is the answer?...",
11
- "positive_response": "70",
12
- "negative_response": "71",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: '70' (log_prob=-0.500), Expected: '70'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: '70' (log_prob=-0.500), Expected: '71'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "Question: \u6c42\u6ee1\u8db3 $17_{b}$ \u662f $97_{b}$ \u7684\u56e0\u5b50\u7684\u6240\u6709\u6574\u6570\u57fa\u6570 $b>9$ \u7684\u548c\u3002\n\nWhat is the answer?",
5
- "positive_response": "70",
6
- "negative_response": "71"
7
- }
8
- ]
@@ -1,30 +0,0 @@
1
- {
2
- "task_name": "polymath_zh_medium",
3
- "model_name": "mock",
4
- "evaluator_name": "log_likelihoods",
5
- "num_pairs": 1,
6
- "all_correct": true,
7
- "pairs": [
8
- {
9
- "pair_id": 0,
10
- "prompt": "Question: \u5728$\\triangle ABC$\u4e2d\uff0c\u5df2\u77e5$(a+c)(\\sin A - \\sin C) = b(\\sin A - \\sin B)$. \uff0c\u8bf7\u95ee$\\angle C$\u7684\u5f27\u5ea6\u503c\u662f\u591a\u5c11\uff1f\n\n...",
11
- "positive_response": "$\\frac{\\pi}{3}$",
12
- "negative_response": "$\\frac{\\pi}{3}$ + 1",
13
- "positive_evaluation": {
14
- "ground_truth": "TRUTHFUL",
15
- "confidence": 1.0,
16
- "expected": "TRUTHFUL",
17
- "correct": true,
18
- "details": "Predicted: '$\\frac{\\pi}{3}$' (log_prob=-0.500), Expected: '$\\frac{\\pi}{3}$'"
19
- },
20
- "negative_evaluation": {
21
- "ground_truth": "UNTRUTHFUL",
22
- "confidence": 0.0,
23
- "expected": "UNTRUTHFUL",
24
- "correct": true,
25
- "details": "Predicted: '$\\frac{\\pi}{3}$' (log_prob=-0.500), Expected: '$\\frac{\\pi}{3}$ + 1'"
26
- },
27
- "both_correct": true
28
- }
29
- ]
30
- }
@@ -1,8 +0,0 @@
1
- [
2
- {
3
- "pair_id": 0,
4
- "prompt": "Question: \u5728$\\triangle ABC$\u4e2d\uff0c\u5df2\u77e5$(a+c)(\\sin A - \\sin C) = b(\\sin A - \\sin B)$. \uff0c\u8bf7\u95ee$\\angle C$\u7684\u5f27\u5ea6\u503c\u662f\u591a\u5c11\uff1f\n\nWhat is the answer?",
5
- "positive_response": "$\\frac{\\pi}{3}$",
6
- "negative_response": "$\\frac{\\pi}{3}$ + 1"
7
- }
8
- ]