PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl → 0.7.701__py3-none-any.whl - Mend

wisent 0.7.379py3-none-any.whl → 0.7.701py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (725) hide show

wisent/examples/scripts/results/missing_benchmark_tags.json DELETED Viewed

@@ -1,341 +0,0 @@
-{
-  "20_newsgroups": {
-    "tags": []
-  },
-  "AraDiCE": {
-    "tags": []
-  },
-  "ArabCulture": {
-    "tags": []
-  },
-  "Tag": {
-    "tags": []
-  },
-  "acp_bench": {
-    "tags": []
-  },
-  "acp_bench_hard": {
-    "tags": []
-  },
-  "afrimgsm_direct_amh": {
-    "tags": []
-  },
-  "afrimmlu_direct_amh": {
-    "tags": []
-  },
-  "afrixnli_en_direct_amh": {
-    "tags": []
-  },
-  "ag_news": {
-    "tags": []
-  },
-  "aime": {
-    "tags": []
-  },
-  "aime2024": {
-    "tags": []
-  },
-  "aime2025": {
-    "tags": []
-  },
-  "apps": {
-    "tags": []
-  },
-  "arabic_exams": {
-    "tags": []
-  },
-  "argument_topic": {
-    "tags": []
-  },
-  "atis": {
-    "tags": []
-  },
-  "babilong": {
-    "tags": []
-  },
-  "bangla_mmlu": {
-    "tags": []
-  },
-  "banking77": {
-    "tags": []
-  },
-  "basque-glue": {
-    "tags": []
-  },
-  "bec2016eu": {
-    "tags": []
-  },
-  "benchmarks": {
-    "tags": []
-  },
-  "bhtc_v2": {
-    "tags": []
-  },
-  "boolq": {
-    "tags": []
-  },
-  "boolq-seq2seq": {
-    "tags": []
-  },
-  "cb": {
-    "tags": []
-  },
-  "chain_of_thought": {
-    "tags": []
-  },
-  "claim_stance_topic": {
-    "tags": []
-  },
-  "cnn_dailymail": {
-    "tags": []
-  },
-  "codexglue_code_to_text_go": {
-    "tags": []
-  },
-  "codexglue_code_to_text_java": {
-    "tags": []
-  },
-  "codexglue_code_to_text_javascript": {
-    "tags": []
-  },
-  "codexglue_code_to_text_php": {
-    "tags": []
-  },
-  "codexglue_code_to_text_python": {
-    "tags": []
-  },
-  "codexglue_code_to_text_ruby": {
-    "tags": []
-  },
-  "coedit_gec": {
-    "tags": []
-  },
-  "conala": {
-    "tags": []
-  },
-  "concode": {
-    "tags": []
-  },
-  "copa": {
-    "tags": []
-  },
-  "dbpedia_14": {
-    "tags": []
-  },
-  "doc_vqa": {
-    "tags": []
-  },
-  "ds1000": {
-    "tags": []
-  },
-  "ethos_binary": {
-    "tags": []
-  },
-  "evalita-mp": {
-    "tags": []
-  },
-  "evalita-sp_sum_task_fp-small_p1": {
-    "tags": []
-  },
-  "evalita_LLM": {
-    "tags": []
-  },
-  "financial_tweets": {
-    "tags": []
-  },
-  "flores": {
-    "tags": []
-  },
-  "freebase": {
-    "tags": []
-  },
-  "global_mmlu_ar": {
-    "tags": []
-  },
-  "gpt3_translation_benchmarks": {
-    "tags": []
-  },
-  "hmmt": {
-    "tags": []
-  },
-  "hmmt_feb_2025": {
-    "tags": []
-  },
-  "humaneval_64_instruct": {
-    "tags": []
-  },
-  "humaneval_instruct": {
-    "tags": []
-  },
-  "humanevalpack": {
-    "tags": []
-  },
-  "instruct_humaneval": {
-    "tags": []
-  },
-  "instructhumaneval": {
-    "tags": []
-  },
-  "iwslt2017-ar-en": {
-    "tags": []
-  },
-  "iwslt2017-en-ar": {
-    "tags": []
-  },
-  "law_stack_exchange": {
-    "tags": []
-  },
-  "ledgar": {
-    "tags": []
-  },
-  "livecodebench": {
-    "tags": []
-  },
-  "livemathbench_cnmo_en": {
-    "tags": []
-  },
-  "livemathbench_cnmo_zh": {
-    "tags": []
-  },
-  "llama": {
-    "tags": []
-  },
-  "logieval": {
-    "tags": []
-  },
-  "m_mmlu": {
-    "tags": []
-  },
-  "math": {
-    "tags": []
-  },
-  "math500": {
-    "tags": []
-  },
-  "mbpp_plus": {
-    "tags": []
-  },
-  "medical_abstracts": {
-    "tags": []
-  },
-  "mercury": {
-    "tags": []
-  },
-  "multimedqa": {
-    "tags": []
-  },
-  "multiple_choice": {
-    "tags": []
-  },
-  "non_greedy_robustness_agieval_aqua_rat": {
-    "tags": []
-  },
-  "okapi/arc_multilingual": {
-    "tags": []
-  },
-  "okapi/hellaswag_multilingual": {
-    "tags": []
-  },
-  "okapi/mmlu_multilingual": {
-    "tags": []
-  },
-  "okapi/truthfulqa_multilingual": {
-    "tags": []
-  },
-  "openllm": {
-    "tags": []
-  },
-  "option_order_robustness_agieval_aqua_rat": {
-    "tags": []
-  },
-  "penn_treebank": {
-    "tags": []
-  },
-  "phrases_ca-va": {
-    "tags": []
-  },
-  "polymath_en_high": {
-    "tags": []
-  },
-  "polymath_en_medium": {
-    "tags": []
-  },
-  "polymath_zh_high": {
-    "tags": []
-  },
-  "polymath_zh_medium": {
-    "tags": []
-  },
-  "prompt_robustness_agieval_aqua_rat": {
-    "tags": []
-  },
-  "ptb": {
-    "tags": []
-  },
-  "pythia": {
-    "tags": []
-  },
-  "recode": {
-    "tags": []
-  },
-  "record": {
-    "tags": []
-  },
-  "self_consistency": {
-    "tags": []
-  },
-  "sglue_rte": {
-    "tags": []
-  },
-  "squad2": {
-    "tags": []
-  },
-  "stsb": {
-    "tags": []
-  },
-  "super-glue-lm-eval-v1": {
-    "tags": []
-  },
-  "super-glue-lm-eval-v1-seq2seq": {
-    "tags": []
-  },
-  "super-glue-t5-prompt": {
-    "tags": []
-  },
-  "unfair_tos": {
-    "tags": []
-  },
-  "unitxt": {
-    "tags": []
-  },
-  "wikitext103": {
-    "tags": []
-  },
-  "wmt-ro-en-t5-prompt": {
-    "tags": []
-  },
-  "wmt14_en_fr": {
-    "tags": []
-  },
-  "wmt14_fr_en": {
-    "tags": []
-  },
-  "wmt16_de_en": {
-    "tags": []
-  },
-  "wmt16_en_de": {
-    "tags": []
-  },
-  "wmt16_en_ro": {
-    "tags": []
-  },
-  "wmt16_ro_en": {
-    "tags": []
-  },
-  "xsum": {
-    "tags": []
-  },
-  "yahoo_answers_topics": {
-    "tags": []
-  }
-}

wisent/examples/scripts/results/test_20_newsgroups_evaluation.json DELETED Viewed

@@ -1,30 +0,0 @@
-{
-  "task_name": "20_newsgroups",
-  "model_name": "mock",
-  "evaluator_name": "log_likelihoods",
-  "num_pairs": 1,
-  "all_correct": true,
-  "pairs": [
-    {
-      "pair_id": 0,
-      "prompt": "Classify the Topic of the following Text to one of these options: atheism, computer graphics, micros...",
-      "positive_response": "motorcycles",
-      "negative_response": "atheism",
-      "positive_evaluation": {
-        "ground_truth": "TRUTHFUL",
-        "confidence": 1.0,
-        "expected": "TRUTHFUL",
-        "correct": true,
-        "details": "Predicted: 'motorcycles' (log_prob=-0.500), Expected: 'motorcycles'"
-      },
-      "negative_evaluation": {
-        "ground_truth": "UNTRUTHFUL",
-        "confidence": 0.0,
-        "expected": "UNTRUTHFUL",
-        "correct": true,
-        "details": "Predicted: 'motorcycles' (log_prob=-0.500), Expected: 'atheism'"
-      },
-      "both_correct": true
-    }
-  ]
-}

wisent/examples/scripts/results/test_20_newsgroups_pairs.json DELETED Viewed

@@ -1,8 +0,0 @@
-[
-  {
-    "pair_id": 0,
-    "prompt": "Classify the Topic of the following Text to one of these options: atheism, computer graphics, microsoft windows, pc hardware, mac hardware, windows x, for sale, cars, motorcycles, baseball, hockey, cryptography, electronics, medicine, space, christianity, guns, middle east, politics, religion.\nText:\nI have a line on a Ducati 900GTS 1978 model with 17k on the clock.  Runs\nvery well, paint is the bronze/brown/orange faded out, leaks a bit of oil\nand pops out of 1st with hard accel.  The shop will fix trans and oil \nleak.  They sold the bike to the 1 and only owner.  They want $3495, and\nI am thinking more like $3K.  Any opinions out there?  Please email me.\nThanks.  It would be a nice stable mate to the Beemer.  Then I'll get\na jap bike and call myself Axis Motors!\n\n-- \n-----------------------------------------------------------------------\n\"Tuba\" (Irwin)      \"I honk therefore I am\"     CompuTrac-Richardson,Tx\nirwin@cmptrc.lonestar.org    DoD #0826          (R75/6\nTopic:",
-    "positive_response": "motorcycles",
-    "negative_response": "atheism"
-  }
-]

wisent/examples/scripts/results/test_AraDICE_evaluation.json DELETED Viewed

@@ -1,51 +0,0 @@
-{
-  "task_name": "AraDiCE",
-  "model_name": "mock",
-  "evaluator_name": "log_likelihoods",
-  "num_pairs": 2,
-  "all_correct": true,
-  "pairs": [
-    {
-      "pair_id": 0,
-      "prompt": "Question: \u0628\u0631\u0646\u0627\u0645\u062c \u0627\u0644\u0648\u064a\u0646\u062f\u0648\u0632 \u062d\u0627\u0648\u0644 \u064a\u062b\u0628\u062a \u0627\u0644\u062a\u062d\u062f\u062b\u064a\u0627\u062a \u0639\u0644\u0649 \u0627\u0644\u0643\u0645\u0628\u064a\u0648\u062a\u0631 \u0628\u0633 _ \u0639\u062f\u0649 \u0633\u0639\u0629 \u0627\u0644\u0628\u064a\u0627\u0646\u0627\u062a.\nA. \u0627\u0644\u0628\u0631\u0646\u0627\u0645\u062c\nB. \u0627\u0644...",
-      "positive_response": "\u0627\u0644\u0643\u0645\u0628\u064a\u0648\u062a\u0631",
-      "negative_response": "\u0627\u0644\u0628\u0631\u0646\u0627\u0645\u062c",
-      "positive_evaluation": {
-        "ground_truth": "TRUTHFUL",
-        "confidence": 1.0,
-        "expected": "TRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u0627\u0644\u0643\u0645\u0628\u064a\u0648\u062a\u0631' (log_prob=-0.500), Expected: '\u0627\u0644\u0643\u0645\u0628\u064a\u0648\u062a\u0631'"
-      },
-      "negative_evaluation": {
-        "ground_truth": "UNTRUTHFUL",
-        "confidence": 0.0,
-        "expected": "UNTRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u0627\u0644\u0643\u0645\u0628\u064a\u0648\u062a\u0631' (log_prob=-0.500), Expected: '\u0627\u0644\u0628\u0631\u0646\u0627\u0645\u062c'"
-      },
-      "both_correct": true
-    },
-    {
-      "pair_id": 1,
-      "prompt": "Question: \u0647\u0648 \u0641\u064a \u0648\u0627\u062d\u062f\u0629 \u0633\u062a \u0643\u0633\u0628\u062a \u0641\u064a \u0625\u0646\u062f\u064a\u0627\u0646\u0627\u0628\u0648\u0644\u064a\u0633 \u0665\u0660\u0660\nA. \u0646\u0639\u0645\nB. \u0644\u0627...",
-      "positive_response": "\u0644\u0627",
-      "negative_response": "\u0646\u0639\u0645",
-      "positive_evaluation": {
-        "ground_truth": "TRUTHFUL",
-        "confidence": 1.0,
-        "expected": "TRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u0644\u0627' (log_prob=-0.500), Expected: '\u0644\u0627'"
-      },
-      "negative_evaluation": {
-        "ground_truth": "UNTRUTHFUL",
-        "confidence": 0.0,
-        "expected": "UNTRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u0644\u0627' (log_prob=-0.500), Expected: '\u0646\u0639\u0645'"
-      },
-      "both_correct": true
-    }
-  ]
-}

wisent/examples/scripts/results/test_AraDICE_pairs.json DELETED Viewed

@@ -1,14 +0,0 @@
-[
-  {
-    "pair_id": 0,
-    "prompt": "Question: \u0628\u0631\u0646\u0627\u0645\u062c \u0627\u0644\u0648\u064a\u0646\u062f\u0648\u0632 \u062d\u0627\u0648\u0644 \u064a\u062b\u0628\u062a \u0627\u0644\u062a\u062d\u062f\u062b\u064a\u0627\u062a \u0639\u0644\u0649 \u0627\u0644\u0643\u0645\u0628\u064a\u0648\u062a\u0631 \u0628\u0633 _ \u0639\u062f\u0649 \u0633\u0639\u0629 \u0627\u0644\u0628\u064a\u0627\u0646\u0627\u062a.\nA. \u0627\u0644\u0628\u0631\u0646\u0627\u0645\u062c\nB. \u0627\u0644\u0643\u0645\u0628\u064a\u0648\u062a\u0631",
-    "positive_response": "\u0627\u0644\u0643\u0645\u0628\u064a\u0648\u062a\u0631",
-    "negative_response": "\u0627\u0644\u0628\u0631\u0646\u0627\u0645\u062c"
-  },
-  {
-    "pair_id": 1,
-    "prompt": "Question: \u0647\u0648 \u0641\u064a \u0648\u0627\u062d\u062f\u0629 \u0633\u062a \u0643\u0633\u0628\u062a \u0641\u064a \u0625\u0646\u062f\u064a\u0627\u0646\u0627\u0628\u0648\u0644\u064a\u0633 \u0665\u0660\u0660\nA. \u0646\u0639\u0645\nB. \u0644\u0627",
-    "positive_response": "\u0644\u0627",
-    "negative_response": "\u0646\u0639\u0645"
-  }
-]

wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_evaluation.json DELETED Viewed

@@ -1,30 +0,0 @@
-{
-  "task_name": "AraDiCE_boolq_egy",
-  "model_name": "mock",
-  "evaluator_name": "log_likelihoods",
-  "num_pairs": 1,
-  "all_correct": true,
-  "pairs": [
-    {
-      "pair_id": 0,
-      "prompt": "Question: \u0645\u0639\u0642\u0648\u0644 \u062a\u0628\u0642\u0649 \u0631\u0626\u064a\u0633\u0627 \u0644\u0648 \u0627\u0646\u062a \u0645\u0634 \u0645\u0646 \u0645\u0648\u0627\u0644\u064a\u062f \u0627\u0645\u0631\u064a\u0643\u0627\nA. \u0646\u0639\u0645\nB. \u0644\u0627...",
-      "positive_response": "\u0644\u0627",
-      "negative_response": "\u0646\u0639\u0645",
-      "positive_evaluation": {
-        "ground_truth": "TRUTHFUL",
-        "confidence": 1.0,
-        "expected": "TRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u0644\u0627' (log_prob=-0.500), Expected: '\u0644\u0627'"
-      },
-      "negative_evaluation": {
-        "ground_truth": "UNTRUTHFUL",
-        "confidence": 0.0,
-        "expected": "UNTRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u0644\u0627' (log_prob=-0.500), Expected: '\u0646\u0639\u0645'"
-      },
-      "both_correct": true
-    }
-  ]
-}

wisent/examples/scripts/results/test_AraDiCE_boolq_egy/test_AraDiCE_boolq_egy_pairs.json DELETED Viewed

@@ -1,8 +0,0 @@
-[
-  {
-    "pair_id": 0,
-    "prompt": "Question: \u0645\u0639\u0642\u0648\u0644 \u062a\u0628\u0642\u0649 \u0631\u0626\u064a\u0633\u0627 \u0644\u0648 \u0627\u0646\u062a \u0645\u0634 \u0645\u0646 \u0645\u0648\u0627\u0644\u064a\u062f \u0627\u0645\u0631\u064a\u0643\u0627\nA. \u0646\u0639\u0645\nB. \u0644\u0627",
-    "positive_response": "\u0644\u0627",
-    "negative_response": "\u0646\u0639\u0645"
-  }
-]

wisent/examples/scripts/results/test_ArabCulture_evaluation.json DELETED Viewed

@@ -1,51 +0,0 @@
-{
-  "task_name": "ArabCulture",
-  "model_name": "mock",
-  "evaluator_name": "log_likelihoods",
-  "num_pairs": 2,
-  "all_correct": true,
-  "pairs": [
-    {
-      "pair_id": 0,
-      "prompt": "Question: \u064a\u062a\u0645 \u062a\u0642\u0633\u064a\u0645 \u0627\u0644\u0648\u0631\u062b \u0648\u0641\u0642 \u0645\u0639\u0627\u064a\u064a\u0631 \u0645\u0639\u064a\u0646\u0629\nA.  \u064a\u0623\u062e\u0630 \u0627\u0644\u0634\u062e\u0635 \u0627\u0644\u0623\u0643\u0628\u0631 \u0633\u0646\u0627 \u0627\u0644\u062c\u0632\u0621 \u0627\u0644\u063a\u0627\u0644\u0628 \u0645\u0646 \u0627\u0644\u0648\u0631\u062b\nB. \u064a\u062a\u0645 \u0627\u0644...",
-      "positive_response": "\u064a\u062a\u0645 \u0627\u0644\u0627\u0639\u062a\u0645\u0627\u062f \u0639\u0644\u0649 \u0627\u0644\u062f\u064a\u0646 \u0627\u0644\u0627\u0633\u0644\u0627\u0645\u064a \u0648\u0644\u0643\u0646 \u0641\u064a \u0628\u0639\u0636 \u0627\u0644\u0623\u062d\u064a\u0627\u0646 \u064a\u0623\u062e\u0630 \u0627\u0644\u0630\u0643\u0648\u0631 \u0627\u0644\u0623\u0645\u0648\u0627\u0644 \u0648\u064a\u062a\u0631\u0643 \u0627\u0644\u0628\u064a\u062a \u0627\u0644\u0639\u0627\u0626\u0644\u064a \u0644\u0644\u0625\u0646\u0627\u062b",
-      "negative_response": " \u064a\u0623\u062e\u0630 \u0627\u0644\u0634\u062e\u0635 \u0627\u0644\u0623\u0643\u0628\u0631 \u0633\u0646\u0627 \u0627\u0644\u062c\u0632\u0621 \u0627\u0644\u063a\u0627\u0644\u0628 \u0645\u0646 \u0627\u0644\u0648\u0631\u062b",
-      "positive_evaluation": {
-        "ground_truth": "TRUTHFUL",
-        "confidence": 1.0,
-        "expected": "TRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u064a\u062a\u0645 \u0627\u0644\u0627\u0639\u062a\u0645\u0627\u062f \u0639\u0644\u0649 \u0627\u0644\u062f\u064a\u0646 \u0627\u0644\u0627\u0633\u0644\u0627\u0645\u064a \u0648\u0644\u0643\u0646 \u0641\u064a \u0628\u0639\u0636 \u0627\u0644\u0623\u062d\u064a\u0627\u0646 \u064a\u0623\u062e\u0630 \u0627\u0644\u0630\u0643\u0648\u0631 \u0627\u0644\u0623\u0645\u0648\u0627\u0644 \u0648\u064a\u062a\u0631\u0643 \u0627\u0644\u0628\u064a\u062a \u0627\u0644\u0639\u0627\u0626\u0644\u064a \u0644\u0644\u0625\u0646\u0627\u062b' (log_prob=-0.500), Expected: '\u064a\u062a\u0645 \u0627\u0644\u0627\u0639\u062a\u0645\u0627\u062f \u0639\u0644\u0649 \u0627\u0644\u062f\u064a\u0646 \u0627\u0644\u0627\u0633\u0644\u0627\u0645\u064a \u0648\u0644\u0643\u0646 \u0641\u064a \u0628\u0639\u0636 \u0627\u0644\u0623\u062d\u064a\u0627\u0646 \u064a\u0623\u062e\u0630 \u0627\u0644\u0630\u0643\u0648\u0631 \u0627\u0644\u0623\u0645\u0648\u0627\u0644 \u0648\u064a\u062a\u0631\u0643 \u0627\u0644\u0628\u064a\u062a \u0627\u0644\u0639\u0627\u0626\u0644\u064a \u0644\u0644\u0625\u0646\u0627\u062b'"
-      },
-      "negative_evaluation": {
-        "ground_truth": "UNTRUTHFUL",
-        "confidence": 0.0,
-        "expected": "UNTRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u064a\u062a\u0645 \u0627\u0644\u0627\u0639\u062a\u0645\u0627\u062f \u0639\u0644\u0649 \u0627\u0644\u062f\u064a\u0646 \u0627\u0644\u0627\u0633\u0644\u0627\u0645\u064a \u0648\u0644\u0643\u0646 \u0641\u064a \u0628\u0639\u0636 \u0627\u0644\u0623\u062d\u064a\u0627\u0646 \u064a\u0623\u062e\u0630 \u0627\u0644\u0630\u0643\u0648\u0631 \u0627\u0644\u0623\u0645\u0648\u0627\u0644 \u0648\u064a\u062a\u0631\u0643 \u0627\u0644\u0628\u064a\u062a \u0627\u0644\u0639\u0627\u0626\u0644\u064a \u0644\u0644\u0625\u0646\u0627\u062b' (log_prob=-0.500), Expected: ' \u064a\u0623\u062e\u0630 \u0627\u0644\u0634\u062e\u0635 \u0627\u0644\u0623\u0643\u0628\u0631 \u0633\u0646\u0627 \u0627\u0644\u062c\u0632\u0621 \u0627\u0644\u063a\u0627\u0644\u0628 \u0645\u0646 \u0627\u0644\u0648\u0631\u062b'"
-      },
-      "both_correct": true
-    },
-    {
-      "pair_id": 1,
-      "prompt": "Question: \u0627\u0639\u062a\u0627\u062f\u062a \u0623\u0631\u064a\u062c \u0639\u0644\u0649 \u0623\u0646 \u062a\u0623\u0643\u0644 \u0648\u062c\u0628\u0629 \u062e\u0641\u064a\u0641\u0629 \u0639\u0644\u0649 \u0627\u0644\u0639\u0634\u0627\u0621.\nA.  \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0643\u0633\u0643\u0633\u064a.\nB.  \u0647\u0630\u0627 ...",
-      "positive_response": " \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0637\u0627\u062c\u064a\u0646.",
-      "negative_response": " \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0643\u0633\u0643\u0633\u064a.",
-      "positive_evaluation": {
-        "ground_truth": "TRUTHFUL",
-        "confidence": 1.0,
-        "expected": "TRUTHFUL",
-        "correct": true,
-        "details": "Predicted: ' \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0637\u0627\u062c\u064a\u0646.' (log_prob=-0.500), Expected: ' \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0637\u0627\u062c\u064a\u0646.'"
-      },
-      "negative_evaluation": {
-        "ground_truth": "UNTRUTHFUL",
-        "confidence": 0.0,
-        "expected": "UNTRUTHFUL",
-        "correct": true,
-        "details": "Predicted: ' \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0637\u0627\u062c\u064a\u0646.' (log_prob=-0.500), Expected: ' \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0643\u0633\u0643\u0633\u064a.'"
-      },
-      "both_correct": true
-    }
-  ]
-}

wisent/examples/scripts/results/test_ArabCulture_pairs.json DELETED Viewed

@@ -1,14 +0,0 @@
-[
-  {
-    "pair_id": 0,
-    "prompt": "Question: \u064a\u062a\u0645 \u062a\u0642\u0633\u064a\u0645 \u0627\u0644\u0648\u0631\u062b \u0648\u0641\u0642 \u0645\u0639\u0627\u064a\u064a\u0631 \u0645\u0639\u064a\u0646\u0629\nA.  \u064a\u0623\u062e\u0630 \u0627\u0644\u0634\u062e\u0635 \u0627\u0644\u0623\u0643\u0628\u0631 \u0633\u0646\u0627 \u0627\u0644\u062c\u0632\u0621 \u0627\u0644\u063a\u0627\u0644\u0628 \u0645\u0646 \u0627\u0644\u0648\u0631\u062b\nB. \u064a\u062a\u0645 \u0627\u0644\u0627\u0639\u062a\u0645\u0627\u062f \u0639\u0644\u0649 \u0627\u0644\u062f\u064a\u0646 \u0627\u0644\u0627\u0633\u0644\u0627\u0645\u064a \u0648\u0644\u0643\u0646 \u0641\u064a \u0628\u0639\u0636 \u0627\u0644\u0623\u062d\u064a\u0627\u0646 \u064a\u0623\u062e\u0630 \u0627\u0644\u0630\u0643\u0648\u0631 \u0627\u0644\u0623\u0645\u0648\u0627\u0644 \u0648\u064a\u062a\u0631\u0643 \u0627\u0644\u0628\u064a\u062a \u0627\u0644\u0639\u0627\u0626\u0644\u064a \u0644\u0644\u0625\u0646\u0627\u062b",
-    "positive_response": "\u064a\u062a\u0645 \u0627\u0644\u0627\u0639\u062a\u0645\u0627\u062f \u0639\u0644\u0649 \u0627\u0644\u062f\u064a\u0646 \u0627\u0644\u0627\u0633\u0644\u0627\u0645\u064a \u0648\u0644\u0643\u0646 \u0641\u064a \u0628\u0639\u0636 \u0627\u0644\u0623\u062d\u064a\u0627\u0646 \u064a\u0623\u062e\u0630 \u0627\u0644\u0630\u0643\u0648\u0631 \u0627\u0644\u0623\u0645\u0648\u0627\u0644 \u0648\u064a\u062a\u0631\u0643 \u0627\u0644\u0628\u064a\u062a \u0627\u0644\u0639\u0627\u0626\u0644\u064a \u0644\u0644\u0625\u0646\u0627\u062b",
-    "negative_response": " \u064a\u0623\u062e\u0630 \u0627\u0644\u0634\u062e\u0635 \u0627\u0644\u0623\u0643\u0628\u0631 \u0633\u0646\u0627 \u0627\u0644\u062c\u0632\u0621 \u0627\u0644\u063a\u0627\u0644\u0628 \u0645\u0646 \u0627\u0644\u0648\u0631\u062b"
-  },
-  {
-    "pair_id": 1,
-    "prompt": "Question: \u0627\u0639\u062a\u0627\u062f\u062a \u0623\u0631\u064a\u062c \u0639\u0644\u0649 \u0623\u0646 \u062a\u0623\u0643\u0644 \u0648\u062c\u0628\u0629 \u062e\u0641\u064a\u0641\u0629 \u0639\u0644\u0649 \u0627\u0644\u0639\u0634\u0627\u0621.\nA.  \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0643\u0633\u0643\u0633\u064a.\nB.  \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0637\u0627\u062c\u064a\u0646.",
-    "positive_response": " \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0637\u0627\u062c\u064a\u0646.",
-    "negative_response": " \u0647\u0630\u0627 \u0627\u0644\u0645\u0633\u0627\u0621\u060c \u0637\u0628\u062e\u062a \u0623\u0631\u064a\u062c \u0627\u0644\u0643\u0633\u0643\u0633\u064a."
-  }
-]

wisent/examples/scripts/results/test_Tag_evaluation.json DELETED Viewed

@@ -1,30 +0,0 @@
-{
-  "task_name": "Tag",
-  "model_name": "mock",
-  "evaluator_name": "log_likelihoods",
-  "num_pairs": 1,
-  "all_correct": true,
-  "pairs": [
-    {
-      "pair_id": 0,
-      "prompt": "Database: california_schools\nQuery: Among the schools with the average score in Math over 560 in the...",
-      "positive_response": "71",
-      "negative_response": "[\"Del Amigo High (Continuation)\", \"Rancho del Mar High (Continuation)\", \"Millennium High Alternative\"]",
-      "positive_evaluation": {
-        "ground_truth": "TRUTHFUL",
-        "confidence": 1.0,
-        "expected": "TRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '71' (log_prob=-0.500), Expected: '71'"
-      },
-      "negative_evaluation": {
-        "ground_truth": "UNTRUTHFUL",
-        "confidence": 0.0,
-        "expected": "UNTRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '71' (log_prob=-0.500), Expected: '[\"Del Amigo High (Continuation)\", \"Rancho del Mar High (Continuation)\", \"Millennium High Alternative\"]'"
-      },
-      "both_correct": true
-    }
-  ]
-}

wisent/examples/scripts/results/test_Tag_pairs.json DELETED Viewed

@@ -1,8 +0,0 @@
-[
-  {
-    "pair_id": 0,
-    "prompt": "Database: california_schools\nQuery: Among the schools with the average score in Math over 560 in the SAT test, how many schools are in counties in the bay area?\nAnswer:",
-    "positive_response": "71",
-    "negative_response": "[\"Del Amigo High (Continuation)\", \"Rancho del Mar High (Continuation)\", \"Millennium High Alternative\"]"
-  }
-]

wisent/examples/scripts/results/test_aclue_evaluation.json DELETED Viewed

@@ -1,51 +0,0 @@
-{
-  "task_name": "aclue",
-  "model_name": "mock",
-  "evaluator_name": "log_likelihoods",
-  "num_pairs": 2,
-  "all_correct": true,
-  "pairs": [
-    {
-      "pair_id": 0,
-      "prompt": "Question: \u5728\u201c\u62ef\u9ece\u5143\u4e8e\u4ec1\u5bff\uff0c\u6d4e\u8d62\u52a3\u4ee5\u83b7\u5b89\u8005\u201d\u4e2d\uff0c\u201c\u8d62\u52a3\u201d\u4e4b\u4e49\u4e3a( )\nA. \u8d2b\u6c11\nB. \u4f53\u5f31\u591a\u75c5\u8005...",
-      "positive_response": "\u4f53\u5f31\u591a\u75c5\u8005",
-      "negative_response": "\u8d2b\u6c11",
-      "positive_evaluation": {
-        "ground_truth": "TRUTHFUL",
-        "confidence": 1.0,
-        "expected": "TRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u4f53\u5f31\u591a\u75c5\u8005' (log_prob=-0.500), Expected: '\u4f53\u5f31\u591a\u75c5\u8005'"
-      },
-      "negative_evaluation": {
-        "ground_truth": "UNTRUTHFUL",
-        "confidence": 0.0,
-        "expected": "UNTRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u4f53\u5f31\u591a\u75c5\u8005' (log_prob=-0.500), Expected: '\u8d2b\u6c11'"
-      },
-      "both_correct": true
-    },
-    {
-      "pair_id": 1,
-      "prompt": "Question: \u4ee5\u4e0b\u9009\u9879\u65ad\u53e5\u6b63\u786e\u7684\u662f\uff08\uff09\nA. \u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff\u80a1/\u5185/\u5ec9\u75db\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97/\u606f\u5c0f/\u4fbf\u9ec4/\u7537\u5b50\u5982/\u86ca\u5973\u5b50\u5982\u5a20\nB. \u4e3b\u819d\u75db\u5982\u9525...",
-      "positive_response": "\u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff/\u80a1\u5185\u5ec9\u75db/\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97\u606f/\u5c0f\u4fbf\u9ec4/\u7537\u5b50\u5982\u86ca/\u5973\u5b50\u5982\u5a20",
-      "negative_response": "\u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff\u80a1/\u5185/\u5ec9\u75db\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97/\u606f\u5c0f/\u4fbf\u9ec4/\u7537\u5b50\u5982/\u86ca\u5973\u5b50\u5982\u5a20",
-      "positive_evaluation": {
-        "ground_truth": "TRUTHFUL",
-        "confidence": 1.0,
-        "expected": "TRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff/\u80a1\u5185\u5ec9\u75db/\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97\u606f/\u5c0f\u4fbf\u9ec4/\u7537\u5b50\u5982\u86ca/\u5973\u5b50\u5982\u5a20' (log_prob=-0.500), Expected: '\u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff/\u80a1\u5185\u5ec9\u75db/\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97\u606f/\u5c0f\u4fbf\u9ec4/\u7537\u5b50\u5982\u86ca/\u5973\u5b50\u5982\u5a20'"
-      },
-      "negative_evaluation": {
-        "ground_truth": "UNTRUTHFUL",
-        "confidence": 0.0,
-        "expected": "UNTRUTHFUL",
-        "correct": true,
-        "details": "Predicted: '\u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff/\u80a1\u5185\u5ec9\u75db/\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97\u606f/\u5c0f\u4fbf\u9ec4/\u7537\u5b50\u5982\u86ca/\u5973\u5b50\u5982\u5a20' (log_prob=-0.500), Expected: '\u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff\u80a1/\u5185/\u5ec9\u75db\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97/\u606f\u5c0f/\u4fbf\u9ec4/\u7537\u5b50\u5982/\u86ca\u5973\u5b50\u5982\u5a20'"
-      },
-      "both_correct": true
-    }
-  ]
-}

wisent/examples/scripts/results/test_aclue_pairs.json DELETED Viewed

@@ -1,14 +0,0 @@
-[
-  {
-    "pair_id": 0,
-    "prompt": "Question: \u5728\u201c\u62ef\u9ece\u5143\u4e8e\u4ec1\u5bff\uff0c\u6d4e\u8d62\u52a3\u4ee5\u83b7\u5b89\u8005\u201d\u4e2d\uff0c\u201c\u8d62\u52a3\u201d\u4e4b\u4e49\u4e3a( )\nA. \u8d2b\u6c11\nB. \u4f53\u5f31\u591a\u75c5\u8005",
-    "positive_response": "\u4f53\u5f31\u591a\u75c5\u8005",
-    "negative_response": "\u8d2b\u6c11"
-  },
-  {
-    "pair_id": 1,
-    "prompt": "Question: \u4ee5\u4e0b\u9009\u9879\u65ad\u53e5\u6b63\u786e\u7684\u662f\uff08\uff09\nA. \u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff\u80a1/\u5185/\u5ec9\u75db\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97/\u606f\u5c0f/\u4fbf\u9ec4/\u7537\u5b50\u5982/\u86ca\u5973\u5b50\u5982\u5a20\nB. \u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff/\u80a1\u5185\u5ec9\u75db/\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97\u606f/\u5c0f\u4fbf\u9ec4/\u7537\u5b50\u5982\u86ca/\u5973\u5b50\u5982\u5a20",
-    "positive_response": "\u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff/\u80a1\u5185\u5ec9\u75db/\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97\u606f/\u5c0f\u4fbf\u9ec4/\u7537\u5b50\u5982\u86ca/\u5973\u5b50\u5982\u5a20",
-    "negative_response": "\u4e3b\u819d\u75db\u5982\u9525/\u4e0d\u5f97\u5c48\u4f38/\u820c\u7eb5\u6d8e\u4e0b/\u70e6\u9006/\u6eba\u96be/\u5c0f\u4fbf\u6025\u5f15\u9634\u75db/\u9634\u75ff\u80a1/\u5185/\u5ec9\u75db\u5987\u4eba\u6f0f\u4e0b\u4e0d\u6b62/\u8179\u80c0\u6ee1\u4e0d\u5f97/\u606f\u5c0f/\u4fbf\u9ec4/\u7537\u5b50\u5982/\u86ca\u5973\u5b50\u5982\u5a20"
-  }
-]

wisent 0.7.379__py3-none-any.whl → 0.7.701__py3-none-any.whl

wisent 0.7.379py3-none-any.whl → 0.7.701py3-none-any.whl