PyPI - wisent - Versions diffs - 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl - Mend

wisent 0.7.701py3-none-any.whl → 0.7.1045py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (391) hide show

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/eus_trivia.py CHANGED Viewed

@@ -130,14 +130,12 @@ class EusTriviaExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "eus_trivia",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/evalita_llm.py CHANGED Viewed

@@ -137,14 +137,12 @@ class EvalitaLlmExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "evalita_llm",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/financial.py CHANGED Viewed

@@ -80,12 +80,10 @@ class FinancialExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "financial"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/flan.py CHANGED Viewed

@@ -87,12 +87,10 @@ class FlanExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "flan"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/french_bench.py CHANGED Viewed

@@ -173,14 +173,12 @@ class FrenchBenchExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "french_bench",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/galician_bench.py CHANGED Viewed

@@ -126,14 +126,12 @@ class GalicianBenchExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "galician_bench",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gaokao.py CHANGED Viewed

@@ -109,12 +109,12 @@ class GaokaoExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {query}\nA. {incorrect}\nB. {correct}"
+            prompt = f"Question: {query}"
             metadata = {"label": "gaokao"}
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/glianorex.py CHANGED Viewed

@@ -91,12 +91,10 @@ class GlianorexExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "glianorex"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_mmlu.py CHANGED Viewed

@@ -142,14 +142,12 @@ class GlobalMmluExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "global_mmlu",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/global_piqa.py CHANGED Viewed

@@ -123,14 +123,12 @@ class GlobalPiqaExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "global_piqa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gpt3.py CHANGED Viewed

@@ -83,12 +83,10 @@ class Gpt3Extractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "gpt3"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/groundcocoa.py CHANGED Viewed

@@ -155,14 +155,12 @@ class GroundcocoaExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "groundcocoa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/haerae.py CHANGED Viewed

@@ -123,14 +123,12 @@ class HaeraeExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "haerae",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py CHANGED Viewed

@@ -83,14 +83,14 @@ class HeadQAExtractor(LMEvalBenchmarkExtractor):
             correct = answers[answer_idx]
             incorrect = answers[(answer_idx+1)%len(answers)]
-            formatted_question = f"Question: {qtext}\nAnswer:\nA. {incorrect}\nB. {correct}"
+            prompt = f"Question: {qtext}\nAnswer:"
             metadata = {
                 "label": "headqa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py CHANGED Viewed

@@ -96,14 +96,14 @@ class HellaSwagExtractor(LMEvalBenchmarkExtractor):
             incorrect = max(incorrect_endings, key=len) if incorrect_endings else endings[(label+1)%len(endings)]
             question = f"{query}"
-            formatted_question = f"{question}\nA. {incorrect}\nB. {correct}"
+            prompt = f"{question}"
             metadata = {
                 "label": "hellaswag",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_ethics.py CHANGED Viewed

@@ -122,12 +122,10 @@ class HendrycksEthicsExtractor(LMEvalBenchmarkExtractor):
                 if not activity or not baseline:
                     return None
-                question = f"Which action results in greater overall happiness?\nA. {activity}\nB. {baseline}"
-                # For utilitarianism, we need to compare - just use A as correct for now
-                # (the actual rating field is empty in the data)
-                correct = "A"
-                incorrect = "B"
+                # Raw prompt - activity is correct, baseline is incorrect
+                question = "Which action results in greater overall happiness?"
+                correct = activity
+                incorrect = baseline
                 metadata = {"label": "hendrycks_ethics"}
@@ -196,14 +194,12 @@ class HendrycksEthicsExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "hendrycks_ethics",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hendrycks_math.py CHANGED Viewed

@@ -138,39 +138,86 @@ class HendrycksMathExtractor(LMEvalBenchmarkExtractor):
         return None
-    def _create_incorrect_answer(self, correct: str) -> str:
+    def _create_incorrect_answer(self, correct: str, doc: dict = None) -> str:
         """
-        Create an incorrect answer by modifying the correct one.
+        Create a meaningful incorrect answer by using different plausible wrong values.
+        Strategy:
+        1. For integers: use a different integer (multiply by 2, subtract, etc.)
+        2. For fractions: change numerator/denominator in a plausible way
+        3. For expressions: provide a structurally different but plausible answer
         Args:
             correct: The correct answer
+            doc: Optional doc for context
         Returns:
-            An incorrect answer
+            A plausible but incorrect answer
         """
-        # Try to parse as number and modify it
+        import random
+        random.seed(hash(correct) % (2**32))  # Deterministic based on answer
+        # Try to parse as number and create plausible wrong answer
         try:
-            # Remove common LaTeX/math formatting
             clean = correct.replace('$', '').replace(',', '').replace('^\\circ', '').replace('^{\\circ}', '').strip()
             # Try integer
             num = int(clean)
-            return str(num + 1)
+            # Use various wrong transformations
+            wrong_transforms = [
+                num * 2,           # doubled
+                num // 2 if num > 1 else num * 3,  # halved or tripled
+                num - 1 if num > 0 else num + 2,   # off by different amount
+                num + 10,          # significantly different
+                abs(num) * -1 if num > 0 else abs(num),  # sign flip
+            ]
+            return str(random.choice(wrong_transforms))
         except ValueError:
             try:
                 # Try float
                 num = float(clean)
-                return str(num + 1.0)
+                wrong_transforms = [
+                    num * 2,
+                    num / 2,
+                    num - 0.5,
+                    num + 0.25,
+                    round(num) if num != round(num) else num + 0.5,
+                ]
+                return str(random.choice(wrong_transforms))
             except ValueError:
-                # Can't parse as number, create a modified version
-                # For fractions like \frac{8}{17}, modify numerator
-                frac_match = re.match(r'\\frac\{(\d+)\}\{(\d+)\}', correct)
-                if frac_match:
-                    num, denom = frac_match.groups()
-                    return f"\\frac{{{int(num) + 1}}}{{{denom}}}"
-                # For other cases, just append " + 1"
-                return f"{correct} + 1"
+                pass
+        # For fractions like \frac{8}{17}, create plausible wrong fraction
+        frac_match = re.match(r'\\frac\{(\d+)\}\{(\d+)\}', correct)
+        if frac_match:
+            num, denom = int(frac_match.group(1)), int(frac_match.group(2))
+            wrong_fracs = [
+                f"\\frac{{{denom}}}{{{num}}}",  # inverted
+                f"\\frac{{{num}}}{{{denom + 1}}}",  # different denominator
+                f"\\frac{{{num * 2}}}{{{denom}}}",  # doubled numerator
+            ]
+            return random.choice(wrong_fracs)
+        # For sqrt expressions
+        sqrt_match = re.search(r'\\sqrt\{(\d+)\}', correct)
+        if sqrt_match:
+            val = int(sqrt_match.group(1))
+            wrong_vals = [val + 1, val - 1 if val > 1 else val + 2, val * 2]
+            return correct.replace(f"\\sqrt{{{val}}}", f"\\sqrt{{{random.choice(wrong_vals)}}}")
+        # For pi expressions
+        if '\\pi' in correct:
+            if '2\\pi' in correct:
+                return correct.replace('2\\pi', '\\pi')
+            elif '\\pi' in correct:
+                return correct.replace('\\pi', '2\\pi')
+        # For other symbolic answers, provide common wrong alternatives
+        common_wrong = ['0', '1', '-1', '2', '\\infty', 'undefined']
+        if correct not in common_wrong:
+            return random.choice([w for w in common_wrong if w != correct])
+        return "incorrect"
     @staticmethod
     def _build_pair(

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/histoires_morales.py CHANGED Viewed

@@ -150,14 +150,12 @@ class HistoiresMoralesExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "histoires_morales",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hrm8k.py CHANGED Viewed

@@ -174,14 +174,12 @@ class Hrm8kExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "hrm8k",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/humaneval_infilling.py CHANGED Viewed

@@ -123,14 +123,12 @@ class HumanevalInfillingExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "humaneval_infilling",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/icelandic_winogrande.py CHANGED Viewed

@@ -123,14 +123,12 @@ class IcelandicWinograndeExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "icelandic_winogrande",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse.py CHANGED Viewed

@@ -80,12 +80,10 @@ class InverseExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "inverse"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse_scaling.py CHANGED Viewed

@@ -163,14 +163,12 @@ class InverseScalingExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "inverse_scaling",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ja.py CHANGED Viewed

@@ -80,12 +80,10 @@ class JaExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "ja"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard.py CHANGED Viewed

@@ -126,14 +126,12 @@ class JapaneseLeaderboardExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "japanese_leaderboard",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard_mc.py CHANGED Viewed

@@ -103,7 +103,7 @@ class JapaneseLeaderboardMultipleChoiceExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\\nA. {incorrect}\\nB. {correct}"
+            prompt = f"Question: {question}"
             positive_response = PositiveResponse(model_response=correct)
             negative_response = NegativeResponse(model_response=incorrect)

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu.py CHANGED Viewed

@@ -139,14 +139,12 @@ class KmmluExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "kmmlu",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kobest.py CHANGED Viewed

@@ -136,14 +136,12 @@ class KobestExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "kobest",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kormedmcqa.py CHANGED Viewed

@@ -118,29 +118,17 @@ class KormedmcqaExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            # Format question with all choices
-            formatted_question = (
-                f"{question}\n"
-                f"A. {choices[0]}\n"
-                f"B. {choices[1]}\n"
-                f"C. {choices[2]}\n"
-                f"D. {choices[3]}\n"
-                f"E. {choices[4]}\n"
-                f"정답："
-            )
+            # Raw prompt without MC formatting
+            prompt = question
             metadata = {
                 "label": "kormedmcqa",
             }
-            # The correct answer is the letter (A-E)
-            correct_letter = chr(ord('A') + answer_idx)
-            incorrect_letter = chr(ord('A') + incorrect_idx)
             return self._build_pair(
-                question=formatted_question,
-                correct=correct_letter,
-                incorrect=incorrect_letter,
+                question=prompt,
+                correct=correct,
+                incorrect=incorrect,
                 metadata=metadata,
             )

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_cloze.py CHANGED Viewed

@@ -156,14 +156,12 @@ class LambadaClozeExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "lambada_cloze",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_multilingual.py CHANGED Viewed

@@ -156,14 +156,12 @@ class LambadaMultilingualExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "lambada_multilingual",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/law.py CHANGED Viewed

@@ -80,12 +80,10 @@ class LawExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "law"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl

wisent 0.7.701py3-none-any.whl → 0.7.1045py3-none-any.whl