PyPI - wisent - Versions diffs - 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl - Mend

wisent 0.7.701py3-none-any.whl → 0.7.1045py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (391) hide show

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench.py CHANGED Viewed

@@ -178,14 +178,12 @@ class AcpBenchExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "acp_bench",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/acp_bench_hard.py CHANGED Viewed

@@ -156,14 +156,12 @@ class AcpBenchHardExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "acp_bench_hard",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/advanced.py CHANGED Viewed

@@ -66,7 +66,7 @@ class AdvancedExtractor(LMEvalBenchmarkExtractor):
                 metadata = {"label": "advanced_ai_risk"}
                 return self._build_pair(
-                    question=formatted_question,
+                    question=question,
                     correct=correct,
                     incorrect=incorrect,
                     metadata=metadata,
@@ -103,12 +103,10 @@ class AdvancedExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "advanced"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aexams.py CHANGED Viewed

@@ -155,14 +155,12 @@ class AexamsExtractor(LMEvalBenchmarkExtractor):
                 )
                 return None
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "aexams",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrimmlu.py CHANGED Viewed

@@ -86,12 +86,10 @@ class AfrimmluExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "afrimmlu"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/afrixnli.py CHANGED Viewed

@@ -101,8 +101,8 @@ class AfrixnliExtractor(LMEvalBenchmarkExtractor):
                 return None
             incorrect = label_map[incorrect_labels[0]]
-            # Format the NLI prompt
-            prompt = f"Premise: {premise}\nHypothesis: {hypothesis}.\nA. {incorrect}\nB. {correct}"
+            # Raw prompt without A./B. formatting
+            prompt = f"Premise: {premise}\nHypothesis: {hypothesis}"
             metadata = {"label": "afrixnli"}

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabculture.py CHANGED Viewed

@@ -151,14 +151,12 @@ class ArabcultureExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "arabculture",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic.py CHANGED Viewed

@@ -71,12 +71,10 @@ class ArabicExtractor(LMEvalBenchmarkExtractor):
             correct = choices[answer_idx]
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "arabic"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_exams.py CHANGED Viewed

@@ -77,12 +77,10 @@ class ArabicExamsExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "arabic_exams"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_complete.py CHANGED Viewed

@@ -139,14 +139,12 @@ class ArabicLeaderboardCompleteExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "arabic_leaderboard_complete",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabic_leaderboard_light.py CHANGED Viewed

@@ -139,14 +139,12 @@ class ArabicLeaderboardLightExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "arabic_leaderboard_light",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arabicmmlu.py CHANGED Viewed

@@ -138,14 +138,12 @@ class ArabicmmluExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "arabicmmlu",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/aradice.py CHANGED Viewed

@@ -241,12 +241,10 @@ class AradiceExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "aradice"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc.py CHANGED Viewed

@@ -106,12 +106,10 @@ class ArcExtractor(LMEvalBenchmarkExtractor):
                     extra={"doc": doc},
                 )
                 return None
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "arc"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py CHANGED Viewed

@@ -89,14 +89,13 @@ class ArcChallengeExtractor(LMEvalBenchmarkExtractor):
             incorrect = choices[(answer_idx+1)%len(choices)]
             question = f"{question}"
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "arc_easy",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py CHANGED Viewed

@@ -89,14 +89,13 @@ class ArcEasyExtractor(LMEvalBenchmarkExtractor):
             incorrect = choices[(answer_idx+1)%len(choices)]
             question = f"{question}"
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "arc_easy",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py CHANGED Viewed

@@ -85,14 +85,14 @@ class ArithmeticExtractor(LMEvalBenchmarkExtractor):
             incorrect_val = float(completion) + 1
             incorrect = str(int(incorrect_val)) if incorrect_val == int(incorrect_val) else str(incorrect_val)
-            formatted_question = f"{context}\nA. {incorrect}\nB. {correct}"
+            prompt = f"{context}"
             metadata = {
                 "label": "arithmetic",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py CHANGED Viewed

@@ -93,14 +93,14 @@ class ASDivExtractor(LMEvalBenchmarkExtractor):
             incorrect_val = float(numerical_answer) + 1
             incorrect = str(int(incorrect_val)) if incorrect_val == int(incorrect_val) else str(incorrect_val)
-            formatted_question = f"{body}\nQuestion:{question}\nA. {incorrect}\nB. {correct}"
+            prompt = f"{body}\nQuestion:{question}"
             metadata = {
                 "label": "asdiv",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/babi.py CHANGED Viewed

@@ -82,8 +82,42 @@ class BabiExtractor(LMEvalBenchmarkExtractor):
                 log.debug("Skipping doc due to missing/invalid fields", extra={"doc": doc})
                 return None
-            # Create an incorrect answer by appending "incorrect" or using a generic wrong answer
-            incorrect = f"not {correct}"
+            # Create an incorrect answer using plausible alternatives from babi vocabulary
+            import random
+            random.seed(hash(correct + passage) % (2**32))
+            # Common babi answer categories
+            locations = ['bathroom', 'bedroom', 'kitchen', 'garden', 'hallway', 'office', 'park']
+            people = ['Mary', 'John', 'Sandra', 'Daniel', 'Bill', 'Fred', 'Julie', 'Emily']
+            objects = ['football', 'apple', 'milk', 'keys', 'box', 'ball']
+            directions = ['north', 'south', 'east', 'west']
+            animals = ['cat', 'dog', 'mouse', 'wolf', 'sheep', 'lion']
+            yes_no = ['yes', 'no']
+            # Determine answer type and pick a wrong alternative
+            correct_lower = correct.lower()
+            if correct_lower in [l.lower() for l in locations]:
+                incorrect = random.choice([l for l in locations if l.lower() != correct_lower])
+            elif correct_lower in [p.lower() for p in people]:
+                incorrect = random.choice([p for p in people if p.lower() != correct_lower])
+            elif correct_lower in [o.lower() for o in objects]:
+                incorrect = random.choice([o for o in objects if o.lower() != correct_lower])
+            elif correct_lower in [d.lower() for d in directions]:
+                incorrect = random.choice([d for d in directions if d.lower() != correct_lower])
+            elif correct_lower in [a.lower() for a in animals]:
+                incorrect = random.choice([a for a in animals if a.lower() != correct_lower])
+            elif correct_lower in yes_no:
+                incorrect = 'no' if correct_lower == 'yes' else 'yes'
+            elif correct.isdigit():
+                num = int(correct)
+                incorrect = str(random.choice([n for n in [num-1, num+1, num*2] if n != num and n >= 0]))
+            else:
+                # Fallback: use a generic wrong answer from the passage words
+                passage_words = [w for w in passage.split() if len(w) > 3 and w.isalpha() and w.lower() != correct_lower]
+                if passage_words:
+                    incorrect = random.choice(passage_words)
+                else:
+                    incorrect = "unknown"
             # Format the prompt with passage and question
             prompt = f"Passage: {passage}\n\nQuestion: {question}"

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/basque_bench.py CHANGED Viewed

@@ -126,14 +126,12 @@ class BasqueBenchExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "basque_bench",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bbq.py CHANGED Viewed

@@ -140,14 +140,12 @@ class BbqExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "bbq",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/belebele.py CHANGED Viewed

@@ -152,14 +152,12 @@ class BelebeleExtractor(LMEvalBenchmarkExtractor):
                 )
                 return None
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "belebele",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/benchmarks.py CHANGED Viewed

@@ -126,14 +126,12 @@ class BenchmarksExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "benchmarks",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bertaqa.py CHANGED Viewed

@@ -136,14 +136,12 @@ class BertaqaExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "bertaqa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhs.py CHANGED Viewed

@@ -126,14 +126,12 @@ class BhsExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "bhs",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/bhtc.py CHANGED Viewed

@@ -72,11 +72,11 @@ class BhtcExtractor(LMEvalBenchmarkExtractor):
                 incorrect_idx = (answer_idx + 1) % len(choices)
                 incorrect = str(choices[incorrect_idx]).strip()
-                formatted_question = f"Text: {question}\nQuestion: What is the topic of the above text?\nA. {incorrect}\nB. {correct}"
+                prompt = f"Text: {question}\nQuestion: What is the topic of the above text?"
                 metadata = {"label": "bhtc"}
                 return self._build_pair(
-                    question=formatted_question,
+                    question=question,
                     correct=correct,
                     incorrect=incorrect,
                     metadata=metadata,
@@ -116,12 +116,10 @@ class BhtcExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "bhtc"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp.py CHANGED Viewed

@@ -142,14 +142,12 @@ class BlimpExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "blimp",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/blimp_nl.py CHANGED Viewed

@@ -123,14 +123,12 @@ class BlimpNlExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "blimp_nl",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py CHANGED Viewed

@@ -85,17 +85,17 @@ class BoolQExtractor(LMEvalBenchmarkExtractor):
                 )
                 return None
-            correct = "Yes" if label == 1 else "No"
-            incorrect = "No" if label == 1 else "Yes"
+            correct = "yes" if label == 1 else "no"
+            incorrect = "no" if label == 1 else "yes"
-            formatted_question = f"{passage}\nQuestion: {question}?\nAnswer:\nA. {incorrect}\nB. {correct}"
+            prompt = f"{passage}\nQuestion: {question}?\nAnswer:"
             metadata = {
                 "label": "boolq",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,
@@ -114,4 +114,21 @@ class BoolQExtractor(LMEvalBenchmarkExtractor):
     ) -> ContrastivePair:
         positive_response = PositiveResponse(model_response=correct)
         negative_response = NegativeResponse(model_response=incorrect)
-        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))
+        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))
+    @staticmethod
+    def extract_choices_and_answer(task, doc: dict[str, Any]) -> tuple[list[str], str]:
+        """
+        Extract choices and expected answer from a BoolQ document.
+        Args:
+            task: lm-eval task instance (has doc_to_choice, doc_to_target methods)
+            doc: BoolQ document
+        Returns:
+            Tuple of (choices, expected_answer)
+        """
+        choices = task.doc_to_choice(doc)
+        target_idx = task.doc_to_target(doc)
+        expected = choices[target_idx]
+        return choices, expected

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/c4.py CHANGED Viewed

@@ -121,12 +121,10 @@ class C4Extractor(LMEvalBenchmarkExtractor):
                 correct = choices[answer_idx]
                 incorrect_idx = (answer_idx + 1) % len(choices)
                 incorrect = choices[incorrect_idx]
-                formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
                 metadata = {"label": "c4"}
                 return self._build_pair(
-                    question=formatted_question,
+                    question=question,
                     correct=correct,
                     incorrect=incorrect,
                     metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cabbq.py CHANGED Viewed

@@ -123,14 +123,12 @@ class CabbqExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "cabbq",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/careqa.py CHANGED Viewed

@@ -140,14 +140,12 @@ class CareqaExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "careqa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalan_bench.py CHANGED Viewed

@@ -126,14 +126,12 @@ class CatalanBenchExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "catalan_bench",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/catalanqa.py CHANGED Viewed

@@ -144,12 +144,10 @@ class CatalanqaExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "catalanqa"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl

wisent 0.7.701py3-none-any.whl → 0.7.1045py3-none-any.whl