PyPI - wisent - Versions diffs - 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl - Mend

wisent 0.7.701py3-none-any.whl → 0.7.1045py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (391) hide show

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/siqa.py CHANGED Viewed

@@ -124,15 +124,14 @@ class SIQAExtractor(LMEvalBenchmarkExtractor):
                 incorrect_idx = (label_idx + 1) % len(choices)
                 incorrect = choices[incorrect_idx]
-                full_question = f"Context: {context}\nQuestion: {question}"
-                formatted_question = f"{full_question}\nA. {incorrect}\nB. {correct}"
+                prompt = f"Context: {context}\nQuestion: {question}"
                 metadata = {
                     "label": "siqa",
                 }
                 return self._build_pair(
-                    question=formatted_question,
+                    question=prompt,
                     correct=correct,
                     incorrect=incorrect,
                     metadata=metadata,
@@ -171,16 +170,14 @@ class SIQAExtractor(LMEvalBenchmarkExtractor):
                 for line in inputs.split('\n'):
                     if line.strip() and not line.strip().startswith('choice:'):
                         question_lines.append(line.strip())
-                question = '\n'.join(question_lines)
-                formatted_question = f"{question}\nA. {incorrect}\nB. {correct}"
+                prompt = '\n'.join(question_lines)
                 metadata = {
                     "label": "siqa",
                 }
                 return self._build_pair(
-                    question=formatted_question,
+                    question=prompt,
                     correct=correct,
                     incorrect=incorrect,
                     metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py CHANGED Viewed

@@ -85,14 +85,14 @@ class SocialIQAExtractor(LMEvalBenchmarkExtractor):
             correct = answers[label]
             incorrect = answers[(label+1)%len(answers)]
-            formatted_question = f"Q: {context} {question}\nA:\nA. {incorrect}\nB. {correct}"
+            prompt = f"Q: {context} {question}\nA:"
             metadata = {
                 "label": "social_iqa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/spanish_bench.py CHANGED Viewed

@@ -126,14 +126,12 @@ class SpanishBenchExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "spanish_bench",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/storycloze.py CHANGED Viewed

@@ -162,14 +162,12 @@ class StoryclozeExtractor(LMEvalBenchmarkExtractor):
                 incorrect_idx = (answer_idx + 1) % len(choices)
                 incorrect = choices[incorrect_idx]
-                formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
                 metadata = {
                     "label": "storycloze",
                 }
                 return self._build_pair(
-                    question=formatted_question,
+                    question=question,
                     correct=correct,
                     incorrect=incorrect,
                     metadata=metadata,
@@ -199,14 +197,12 @@ class StoryclozeExtractor(LMEvalBenchmarkExtractor):
                 incorrect_idx = (answer_idx + 1) % len(choices)
                 incorrect = choices[incorrect_idx]
-                formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
                 metadata = {
                     "label": "storycloze",
                 }
                 return self._build_pair(
-                    question=formatted_question,
+                    question=question,
                     correct=correct,
                     incorrect=incorrect,
                     metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/summarization.py CHANGED Viewed

@@ -80,12 +80,10 @@ class SummarizationExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "summarization"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super.py CHANGED Viewed

@@ -80,12 +80,10 @@ class SuperExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "super"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/super_glue.py CHANGED Viewed

@@ -125,14 +125,12 @@ class SuperGlueExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "super_glue",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py CHANGED Viewed

@@ -86,14 +86,14 @@ class SwagExtractor(LMEvalBenchmarkExtractor):
             incorrect = endings[(label+1)%len(endings)]
             question = f"{startphrase}"
-            formatted_question = f"{question}\nA. {incorrect}\nB. {correct}"
+            prompt = f"{question}"
             metadata = {
                 "label": "swag",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swde.py CHANGED Viewed

@@ -150,14 +150,12 @@ class SwdeExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "swde",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sycophancy.py CHANGED Viewed

@@ -90,12 +90,10 @@ class SycophancyExtractor(LMEvalBenchmarkExtractor):
             correct = answer_not_matching
             # The sycophantic answer is the incorrect (negative) response
             incorrect = answer_matching
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "sycophancy"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/t0.py CHANGED Viewed

@@ -83,12 +83,10 @@ class T0Extractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "t0"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/teca.py CHANGED Viewed

@@ -83,12 +83,10 @@ class TecaExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "teca"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyarc.py CHANGED Viewed

@@ -83,12 +83,10 @@ class TinyarcExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "tinyarc"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinybenchmarks.py CHANGED Viewed

@@ -126,14 +126,12 @@ class TinybenchmarksExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "tinybenchmarks",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinygsm8k.py CHANGED Viewed

@@ -83,12 +83,10 @@ class Tinygsm8kExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "tinygsm8k"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinyhellaswag.py CHANGED Viewed

@@ -83,12 +83,10 @@ class TinyhellaswagExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "tinyhellaswag"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinymmlu.py CHANGED Viewed

@@ -83,12 +83,10 @@ class TinymmluExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "tinymmlu"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinytruthfulqa.py CHANGED Viewed

@@ -86,12 +86,10 @@ class TinytruthfulqaExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "tinytruthfulqa"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tinywinogrande.py CHANGED Viewed

@@ -83,12 +83,10 @@ class TinywinograndeExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "tinywinogrande"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/tmmluplus.py CHANGED Viewed

@@ -152,14 +152,12 @@ class TmmluplusExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "tmmluplus",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py CHANGED Viewed

@@ -101,14 +101,14 @@ class TriviaQAExtractor(LMEvalBenchmarkExtractor):
                 if incorrect == correct:
                     incorrect += "k"
-            formatted_question = f"Question: {question}\nAnswer:\nA. {incorrect}\nB. {correct}"
+            prompt = f"Question: {question}\nAnswer:"
             metadata = {
                 "label": "triviaqa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa.py CHANGED Viewed

@@ -85,12 +85,10 @@ class TruthfulqaExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "truthfulqa"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py CHANGED Viewed

@@ -91,14 +91,12 @@ class TruthfulQAMC1Extractor(LMEvalBenchmarkExtractor):
             correct = options[answer_idx]
             incorrect = options[(answer_idx+1)%len(options)]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "truthfulqa_mc1",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,
@@ -117,4 +115,11 @@ class TruthfulQAMC1Extractor(LMEvalBenchmarkExtractor):
     ) -> ContrastivePair:
         positive_response = PositiveResponse(model_response=correct)
         negative_response = NegativeResponse(model_response=incorrect)
-        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))
+        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))
+    @staticmethod
+    def extract_choices_and_answer(task, doc: dict[str, Any]) -> tuple[list[str], str]:
+        choices = task.doc_to_choice(doc)
+        target_idx = task.doc_to_target(doc)
+        expected = choices[target_idx]
+        return choices, expected

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py CHANGED Viewed

@@ -111,14 +111,12 @@ class TruthfulQAMC2Extractor(LMEvalBenchmarkExtractor):
             correct = min(correct_answers, key=len)
             incorrect = max(incorrect_answers, key=len)
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "truthfulqa_mc2",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turblimp_core.py CHANGED Viewed

@@ -123,14 +123,12 @@ class TurblimpCoreExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "turblimp_core",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu.py CHANGED Viewed

@@ -132,14 +132,12 @@ class TurkishmmluExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "turkishmmlu",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/turkishmmlu_mc.py CHANGED Viewed

@@ -85,8 +85,6 @@ class TurkishmmluMultipleChoiceExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             positive_response = PositiveResponse(model_response=correct)
             negative_response = NegativeResponse(model_response=incorrect)

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/unscramble.py CHANGED Viewed

@@ -126,14 +126,12 @@ class UnscrambleExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "unscramble",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/vaxx.py CHANGED Viewed

@@ -69,11 +69,11 @@ class VaxxExtractor(LMEvalBenchmarkExtractor):
             incorrect = stance_choices[incorrect_idx]
             # Format as a classification task
-            formatted_question = f"Text: {text}\n\nWhat is the stance towards COVID-19 vaccination?\nA. {incorrect}\nB. {correct}"
+            prompt = f"Text: {text}\n\nWhat is the stance towards COVID-19 vaccination?"
             metadata = {"label": "vaxx"}
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py CHANGED Viewed

@@ -101,14 +101,14 @@ class WebQSExtractor(LMEvalBenchmarkExtractor):
                 if incorrect == correct:
                     incorrect += "k"
-            formatted_question = f"Question: {question}\nAnswer:\nA. {incorrect}\nB. {correct}"
+            prompt = f"Question: {question}\nAnswer:"
             metadata = {
                 "label": "webquestions",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py CHANGED Viewed

@@ -85,11 +85,10 @@ class WiCExtractor(LMEvalBenchmarkExtractor):
                 )
                 return None
-            formatted_question = (
+            prompt = (
                 f"Sentence 1: {sentence1}\n"
                 f"Sentence 2: {sentence2}\n"
-                f"Question: Is the word '{word}' used in the same way in the two sentences above?\n"
-                "Answer:\nA. Yes\nB. No"
+                f"Question: Is the word '{word}' used in the same way in the two sentences above?"
             )
             correct = "Yes" if label == 1 else "No"
@@ -100,7 +99,7 @@ class WiCExtractor(LMEvalBenchmarkExtractor):
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py CHANGED Viewed

@@ -86,7 +86,7 @@ class WinograndeExtractor(LMEvalBenchmarkExtractor):
                 return None
             question = f"Complete the sentence: {sentence}"
-            formatted_question = f"{question}\nA. {option1}\nB. {option2}"
+            prompt = f"{question}"
             correct = option1 if answer == "1" else option2
             incorrect = option2 if answer == "1" else option1
@@ -96,7 +96,7 @@ class WinograndeExtractor(LMEvalBenchmarkExtractor):
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wmdp.py CHANGED Viewed

@@ -126,14 +126,12 @@ class WmdpExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "wmdp",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py CHANGED Viewed

@@ -82,7 +82,7 @@ class WNLIExtractor(LMEvalBenchmarkExtractor):
                 )
                 return None
-            formatted_question = f"{sentence1}\nQuestion: {sentence2} True or False?\nAnswer:\nA. True\nB. False"
+            prompt = f"{sentence1}\nQuestion: {sentence2} True or False?"
             correct = "True" if label == 1 else "False"
             incorrect = "False" if label == 1 else "True"
@@ -92,7 +92,7 @@ class WNLIExtractor(LMEvalBenchmarkExtractor):
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py CHANGED Viewed

@@ -85,7 +85,7 @@ class WSCExtractor(LMEvalBenchmarkExtractor):
                 )
                 return None
-            formatted_question = f"Passage: {text}\nQuestion:In the passage above, does the pronoun \"*{span2_text}*\" refer to \"*{span1_text}*\"?\nA. Yes\nB. No"
+            prompt = f"Passage: {text}\nQuestion: In the passage above, does the pronoun \"{span2_text}\" refer to \"{span1_text}\"?"
             correct = "Yes" if label == 1 else "No"
             incorrect = "No" if label == 1 else "Yes"
@@ -95,7 +95,7 @@ class WSCExtractor(LMEvalBenchmarkExtractor):
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc273.py CHANGED Viewed

@@ -151,14 +151,12 @@ class Wsc273Extractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "wsc273",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl

wisent 0.7.701py3-none-any.whl → 0.7.1045py3-none-any.whl