PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl - Mend

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1020) hide show

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hrm8k.py CHANGED Viewed

@@ -174,14 +174,12 @@ class Hrm8kExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "hrm8k",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/humaneval_infilling.py CHANGED Viewed

@@ -123,14 +123,12 @@ class HumanevalInfillingExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "humaneval_infilling",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/icelandic_winogrande.py CHANGED Viewed

@@ -123,14 +123,12 @@ class IcelandicWinograndeExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "icelandic_winogrande",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse.py CHANGED Viewed

@@ -80,12 +80,10 @@ class InverseExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "inverse"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/inverse_scaling.py CHANGED Viewed

@@ -163,14 +163,12 @@ class InverseScalingExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "inverse_scaling",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/ja.py CHANGED Viewed

@@ -80,12 +80,10 @@ class JaExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "ja"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard.py CHANGED Viewed

@@ -126,14 +126,12 @@ class JapaneseLeaderboardExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "japanese_leaderboard",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/japanese_leaderboard_mc.py CHANGED Viewed

@@ -103,7 +103,7 @@ class JapaneseLeaderboardMultipleChoiceExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\\nA. {incorrect}\\nB. {correct}"
+            prompt = f"Question: {question}"
             positive_response = PositiveResponse(model_response=correct)
             negative_response = NegativeResponse(model_response=incorrect)

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kmmlu.py CHANGED Viewed

@@ -139,14 +139,12 @@ class KmmluExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "kmmlu",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kobest.py CHANGED Viewed

@@ -136,14 +136,12 @@ class KobestExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "kobest",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/kormedmcqa.py CHANGED Viewed

@@ -118,29 +118,17 @@ class KormedmcqaExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            # Format question with all choices
-            formatted_question = (
-                f"{question}\n"
-                f"A. {choices[0]}\n"
-                f"B. {choices[1]}\n"
-                f"C. {choices[2]}\n"
-                f"D. {choices[3]}\n"
-                f"E. {choices[4]}\n"
-                f"정답："
-            )
+            # Raw prompt without MC formatting
+            prompt = question
             metadata = {
                 "label": "kormedmcqa",
             }
-            # The correct answer is the letter (A-E)
-            correct_letter = chr(ord('A') + answer_idx)
-            incorrect_letter = chr(ord('A') + incorrect_idx)
             return self._build_pair(
-                question=formatted_question,
-                correct=correct_letter,
-                incorrect=incorrect_letter,
+                question=prompt,
+                correct=correct,
+                incorrect=incorrect,
                 metadata=metadata,
             )

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_cloze.py CHANGED Viewed

@@ -156,14 +156,12 @@ class LambadaClozeExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "lambada_cloze",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lambada_multilingual.py CHANGED Viewed

@@ -156,14 +156,12 @@ class LambadaMultilingualExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "lambada_multilingual",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/law.py CHANGED Viewed

@@ -80,12 +80,10 @@ class LawExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "law"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/leaderboard.py CHANGED Viewed

@@ -165,14 +165,12 @@ class LeaderboardExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "leaderboard",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lingoly.py CHANGED Viewed

@@ -174,14 +174,12 @@ class LingolyExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "lingoly",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/llama3.py CHANGED Viewed

@@ -123,14 +123,12 @@ class Llama3Extractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "llama3",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/lm_syneval.py CHANGED Viewed

@@ -123,14 +123,12 @@ class LmSynevalExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "lm_syneval",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py CHANGED Viewed

@@ -86,14 +86,14 @@ class LogiQAExtractor(LMEvalBenchmarkExtractor):
             incorrect = options[(label_idx+1)%len(options)]
             question = f"{question}"
-            formatted_question = f"Passage: {context}\nQuestion: {question}\nA. {incorrect}\nB. {correct}"
+            prompt = f"Passage: {context}\nQuestion: {question}"
             metadata = {
                 "label": "logiqa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py CHANGED Viewed

@@ -85,14 +85,14 @@ class LogiQA2Extractor(LMEvalBenchmarkExtractor):
             incorrect = options[(answer+1)%len(options)]
             question = f"{question}"
-            formatted_question = f"Passage: {text}\nQuestion: {question}\nA. {incorrect}\nB. {correct}"
+            prompt = f"Passage: {text}\nQuestion: {question}"
             metadata = {
                 "label": "logiqa2",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbench.py CHANGED Viewed

@@ -123,14 +123,12 @@ class LongbenchExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "longbench",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/longbenchv2.py CHANGED Viewed

@@ -123,14 +123,12 @@ class Longbenchv2Extractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "longbenchv2",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mastermind.py CHANGED Viewed

@@ -115,7 +115,7 @@ class MastermindExtractor(LMEvalBenchmarkExtractor):
                     }
                     return self._build_pair(
-                        question=formatted_question,
+                        question=question,
                         correct=correct,
                         incorrect=incorrect,
                         metadata=metadata,
@@ -174,14 +174,12 @@ class MastermindExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "mastermind",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mbpp.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import re
 from typing import Any, TYPE_CHECKING
 from wisent.core.contrastive_pairs.core.pair import ContrastivePair
@@ -16,14 +17,16 @@ _LOG = setup_logger(__name__)
 task_names = (
     "mbpp",
+    "mbpp_instruct",
     "mbpp_plus",
+    "mbpp_plus_instruct",
 )
 class MBPPExtractor(LMEvalBenchmarkExtractor):
     """Extractor for the MBPP (Mostly Basic Python Problems) benchmark."""
-    evaluator_name = "exact_match"
+    evaluator_name = "coding"
     def extract_contrastive_pairs(
         self,
         lm_eval_task_data: ConfigurableTask,
@@ -57,8 +60,9 @@ class MBPPExtractor(LMEvalBenchmarkExtractor):
         log.info("Extracting contrastive pairs", extra={"doc_count": len(docs)})
+        task_name = getattr(lm_eval_task_data, "NAME", "mbpp")
         for doc in docs:
-            pair = self._extract_pair_from_doc(doc)
+            pair = self._extract_pair_from_doc(doc, task_name)
             if pair is not None:
                 pairs.append(pair)
                 if max_items is not None and len(pairs) >= max_items:
@@ -70,7 +74,7 @@ class MBPPExtractor(LMEvalBenchmarkExtractor):
         return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
+    def _extract_pair_from_doc(self, doc: dict[str, Any], task_name: str) -> ContrastivePair | None:
         """
         Convert a single MBPP doc into a ContrastivePair, if possible.
         Returns None when required fields are missing or malformed.
@@ -80,6 +84,7 @@ class MBPPExtractor(LMEvalBenchmarkExtractor):
         try:
             text = str(doc.get("text", "")).strip()
             code = str(doc.get("code", "")).strip()
+            test_list = doc.get("test_list", [])
             if not text or not code:
                 log.debug(
@@ -94,10 +99,40 @@ class MBPPExtractor(LMEvalBenchmarkExtractor):
             # Incorrect solution: return a placeholder or buggy implementation
             incorrect = "    return None  # Incomplete implementation"
-            formatted_question = f"Write a Python function to solve this problem:\n\n{text}"
+            # Format tests (use first 3 if available)
+            tests_str = "\n".join(test_list[:3]) if test_list else ""
+            # Different prompt format for instruct vs base
+            is_instruct = "instruct" in task_name.lower()
+            if is_instruct:
+                formatted_question = f"You are an expert Python programmer, and here is your task:\n{text}\nYour code should pass these tests:\n{tests_str}"
+            else:
+                formatted_question = f"You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n{tests_str}\n[BEGIN]\n"
+            # Extract entry_point (function name) from first test assertion
+            entry_point = None
+            if test_list:
+                match = re.search(r'assert\s+(\w+)\(', test_list[0])
+                entry_point = match.group(1) if match else None
+            # Format test_code with check() function (like HumanEval format)
+            # Replace function name with 'candidate' in assertions
+            if test_list and entry_point:
+                # Convert "assert func_name(...)" to "assert candidate(...)"
+                converted_tests = [
+                    re.sub(rf'\b{entry_point}\b', 'candidate', test)
+                    for test in test_list
+                ]
+                test_code = f"def check(candidate):\n    " + "\n    ".join(converted_tests)
+            else:
+                test_code = ""
             metadata = {
-                "label": "mbpp",
+                "label": task_name,
+                "entry_point": entry_point,
+                "test_code": test_code,
+                "language": "python",
+                "task_name": task_name,
             }
             return self._build_pair(
@@ -120,4 +155,10 @@ class MBPPExtractor(LMEvalBenchmarkExtractor):
     ) -> ContrastivePair:
         positive_response = PositiveResponse(model_response=correct)
         negative_response = NegativeResponse(model_response=incorrect)
-        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))
+        return ContrastivePair(
+            prompt=question,
+            positive_response=positive_response,
+            negative_response=negative_response,
+            label=metadata.get("label") if metadata else None,
+            metadata=metadata,
+        )

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py CHANGED Viewed

@@ -83,7 +83,7 @@ class MCTACOExtractor(LMEvalBenchmarkExtractor):
                 )
                 return None
-            formatted_question = f"{sentence}\nQuestion: {question}\nAnswer: {answer}\nPlausible:\nA. Yes\nB. No"
+            prompt = f"{sentence}\nQuestion: {question}\nAnswer: {answer}\nPlausible?"
             correct = "Yes" if label == 1 else "No"
             incorrect = "No" if label == 1 else "Yes"
@@ -93,7 +93,7 @@ class MCTACOExtractor(LMEvalBenchmarkExtractor):
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/med_concepts_qa.py CHANGED Viewed

@@ -131,7 +131,7 @@ class MedConceptsQaExtractor(LMEvalBenchmarkExtractor):
                 # For this format, the response should be just the letter
                 return self._build_pair(
-                    question=formatted_question,
+                    question=question,
                     correct=answer_key,
                     incorrect=chr(ord('A') + incorrect_idx),
                     metadata=metadata,
@@ -195,14 +195,12 @@ class MedConceptsQaExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "med_concepts_qa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/meddialog.py CHANGED Viewed

@@ -151,14 +151,12 @@ class MeddialogExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "meddialog",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medical.py CHANGED Viewed

@@ -80,12 +80,10 @@ class MedicalExtractor(LMEvalBenchmarkExtractor):
             correct = str(choices[answer_idx]).strip()
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {"label": "medical"}
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medmcqa.py CHANGED Viewed

@@ -140,14 +140,12 @@ class MedmcqaExtractor(LMEvalBenchmarkExtractor):
             incorrect_idx = (answer_idx + 1) % len(choices)
             incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
             metadata = {
                 "label": "medmcqa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=question,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py CHANGED Viewed

@@ -89,14 +89,14 @@ class MedQAExtractor(LMEvalBenchmarkExtractor):
             correct = endings[label]
             incorrect = endings[(label + 1) % 4]
-            formatted_question = f"Question: {sent1}\nA. {incorrect}\nB. {correct}"
+            prompt = f"Question: {sent1}"
             metadata = {
                 "label": "medqa",
             }
             return self._build_pair(
-                question=formatted_question,
+                question=prompt,
                 correct=correct,
                 incorrect=incorrect,
                 metadata=metadata,

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mela.py CHANGED Viewed

@@ -67,11 +67,11 @@ class MelaExtractor(LMEvalBenchmarkExtractor):
                 incorrect_idx = 1 - answer_idx
                 incorrect = choices[incorrect_idx]
-                formatted_question = f"Sentence: {sentence}\nDetermine whether this sentence is acceptable or unacceptable?\nA. {incorrect}\nB. {correct}"
+                prompt = f"Sentence: {sentence}\nDetermine whether this sentence is acceptable or unacceptable?"
                 metadata = {"label": "mela"}
                 return self._build_pair(
-                    question=formatted_question,
+                    question=prompt,
                     correct=correct,
                     incorrect=incorrect,
                     metadata=metadata,

wisent 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl