PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl - Mend

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1020) hide show

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/agieval.py DELETED Viewed

@@ -1,155 +0,0 @@
-from __future__ import annotations
-from typing import Any, TYPE_CHECKING
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
-from wisent.core.contrastive_pairs.lm_eval_pairs.atoms import LMEvalBenchmarkExtractor
-from wisent.core.cli_logger import setup_logger, bind
-if TYPE_CHECKING:
-    from lm_eval.api.task import ConfigurableTask
-__all__ = ["AgievalExtractor"]
-_LOG = setup_logger(__name__)
-task_names = ("agieval",)
-class AgievalExtractor(LMEvalBenchmarkExtractor):
-    """Extractor for the Agieval benchmark."""
-    evaluator_name = "exact_match"
-    def extract_contrastive_pairs(
-        self,
-        lm_eval_task_data: ConfigurableTask,
-        limit: int | None = None,
-        preferred_doc: str | None = None,
-    ) -> list[ContrastivePair]:
-        """
-        Build contrastive pairs from Agieval docs.
-        Args:
-            lm_eval_task_data: lm-eval task instance for Agieval.
-            limit: Optional maximum number of pairs to produce.
-            preferred_doc: Optional preferred document source.
-        Returns:
-            A list of ContrastivePair objects.
-        """
-        log = bind(_LOG, task=getattr(lm_eval_task_data, "NAME", "unknown"))
-        max_items = self._normalize_limit(limit)
-        docs = self.load_docs(lm_eval_task_data, max_items, preferred_doc=preferred_doc)
-        pairs: list[ContrastivePair] = []
-        log.info("Extracting contrastive pairs", extra={"doc_count": len(docs)})
-        for doc in docs:
-            pair = self._extract_pair_from_doc(doc)
-            if pair is not None:
-                pairs.append(pair)
-                if max_items is not None and len(pairs) >= max_items:
-                    break
-        if not pairs:
-            task_name = getattr(lm_eval_task_data, "NAME", type(lm_eval_task_data).__name__)
-            log.warning("No valid Agieval pairs extracted", extra={"task": task_name})
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        """
-        Convert a single Agieval doc into a ContrastivePair, if possible.
-        Returns None when required fields are missing or malformed.
-        """
-        log = bind(_LOG, doc_id=doc.get("id", "unknown"))
-        try:
-            # Try multiple possible schema formats
-            question = None
-            choices = None
-            answer_idx = None
-            # Format 1: question + choices + answer
-            if "question" in doc and "choices" in doc:
-                question = str(doc.get("question", "")).strip()
-                choices_data = doc.get("choices", {})
-                if isinstance(choices_data, dict):
-                    choices = choices_data.get("text", [])
-                elif isinstance(choices_data, list):
-                    choices = choices_data
-                answer = doc.get("answer", doc.get("answerKey", ""))
-                if isinstance(answer, str) and len(answer) == 1 and answer.isalpha():
-                    answer_idx = ord(answer.upper()) - ord('A')
-                else:
-                    answer_idx = int(answer) if answer else 0
-            # Format 2: instruction + option_a/b/c/d + answer (MMMLU style)
-            elif "instruction" in doc and "option_a" in doc:
-                question = str(doc.get("instruction", "")).strip()
-                choices = [
-                    str(doc.get("option_a", "")).strip(),
-                    str(doc.get("option_b", "")).strip(),
-                    str(doc.get("option_c", "")).strip(),
-                    str(doc.get("option_d", "")).strip(),
-                ]
-                choices = [c for c in choices if c]
-                answer = doc.get("answer", "A")
-                answer_idx = ord(str(answer).upper()) - ord('A')
-            # Format 3: query/prompt + answer
-            elif "query" in doc or "prompt" in doc:
-                question = str(doc.get("query", doc.get("prompt", ""))).strip()
-                # For open-ended questions, use target as correct answer
-                correct_answer = str(doc.get("target", doc.get("answer", ""))).strip()
-                if correct_answer:
-                    metadata = {"label": "agieval"}
-                    return self._build_pair(
-                        question=f"Question: {question}",
-                        correct=correct_answer,
-                        incorrect="incorrect answer",
-                        metadata=metadata,
-                    )
-                return None
-            if not question or not choices or answer_idx is None or not (0 <= answer_idx < len(choices)):
-                log.debug(
-                    "Skipping doc due to missing/invalid fields",
-                    extra={"doc": doc},
-                )
-                return None
-            correct = choices[answer_idx]
-            incorrect_idx = (answer_idx + 1) % len(choices)
-            incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
-            metadata = {
-                "label": "agieval",
-            }
-            return self._build_pair(
-                question=formatted_question,
-                correct=correct,
-                incorrect=incorrect,
-                metadata=metadata,
-            )
-        except Exception as exc:
-            log.error("Error extracting pair from doc", exc_info=exc, extra={"doc": doc})
-            return None
-    @staticmethod
-    def _build_pair(
-        question: str,
-        correct: str,
-        incorrect: str,
-        metadata: dict[str, Any] | None = None,
-    ) -> ContrastivePair:
-        positive_response = PositiveResponse(model_response=correct)
-        negative_response = NegativeResponse(model_response=incorrect)
-        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/code2text.py DELETED Viewed

@@ -1,161 +0,0 @@
-from __future__ import annotations
-from typing import Any, TYPE_CHECKING
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
-from wisent.core.contrastive_pairs.lm_eval_pairs.atoms import LMEvalBenchmarkExtractor
-from wisent.core.cli_logger import setup_logger, bind
-if TYPE_CHECKING:
-    from lm_eval.api.task import ConfigurableTask
-__all__ = ["Code2textExtractor"]
-_LOG = setup_logger(__name__)
-task_names = (
-    "code2text_go",
-    "code2text_java",
-    "code2text_javascript",
-    "code2text_php",
-    "code2text_python",
-    "code2text_ruby",
-)
-class Code2textExtractor(LMEvalBenchmarkExtractor):
-    """Extractor for Code2Text benchmark."""
-    evaluator_name = "generation"
-    def extract_contrastive_pairs(
-        self,
-        lm_eval_task_data: ConfigurableTask,
-        limit: int | None = None,
-        preferred_doc: str | None = None,
-    ) -> list[ContrastivePair]:
-        log = bind(_LOG, task=getattr(lm_eval_task_data, "NAME", "unknown"))
-        max_items = self._normalize_limit(limit)
-        docs = self.load_docs(lm_eval_task_data, max_items, preferred_doc=preferred_doc)
-        pairs: list[ContrastivePair] = []
-        log.info("Extracting contrastive pairs", extra={"doc_count": len(docs)})
-        for doc in docs:
-            pair = self._extract_pair_from_doc(doc)
-            if pair is not None:
-                pairs.append(pair)
-                if max_items is not None and len(pairs) >= max_items:
-                    break
-        if not pairs:
-            task_name = getattr(lm_eval_task_data, "NAME", type(lm_eval_task_data).__name__)
-            log.warning("No valid pairs extracted", extra={"task": task_name})
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        log = bind(_LOG, doc_id=doc.get("id", "unknown"))
-        try:
-            # Format 1: Code2Text format (code_tokens + docstring_tokens)
-            if "code_tokens" in doc and "docstring_tokens" in doc:
-                code_tokens = doc.get("code_tokens", [])
-                docstring_tokens = doc.get("docstring_tokens", [])
-                if not code_tokens or not docstring_tokens:
-                    log.debug("Skipping doc - empty code or docstring tokens", extra={"doc": doc})
-                    return None
-                # Join tokens to create code and docstring
-                code = " ".join(code_tokens).replace("\n", " ")
-                code = " ".join(code.strip().split())
-                docstring = " ".join(docstring_tokens).replace("\n", " ")
-                docstring = " ".join(docstring.strip().split())
-                # Create synthetic negative by shuffling words in the docstring
-                import random
-                words = docstring.split()
-                if len(words) > 3:
-                    shuffled_words = words.copy()
-                    random.shuffle(shuffled_words)
-                    # If shuffle resulted in same order, reverse it
-                    if shuffled_words == words:
-                        shuffled_words = list(reversed(words))
-                    incorrect_docstring = " ".join(shuffled_words)
-                else:
-                    # For very short docstrings, just use a generic incorrect one
-                    incorrect_docstring = "This is an incorrect docstring."
-                prompt = f"Generate documentation for this code:\n\n{code}\n\nDocumentation:"
-                metadata = {"label": "code2text"}
-                return self._build_pair(
-                    question=prompt,
-                    correct=docstring,
-                    incorrect=incorrect_docstring,
-                    metadata=metadata,
-                )
-            # Format 2: Multiple choice format (fallback)
-            # Try multiple format patterns for question
-            question = doc.get("question", doc.get("query", doc.get("input", doc.get("instruction", doc.get("prompt", ""))))).strip()
-            # Try multiple format patterns for choices
-            choices = doc.get("choices", doc.get("options", doc.get("answers", [])))
-            # Handle option_a/b/c/d format
-            if not choices and "option_a" in doc:
-                choices = [
-                    str(doc.get("option_a", "")).strip(),
-                    str(doc.get("option_b", "")).strip(),
-                    str(doc.get("option_c", "")).strip(),
-                    str(doc.get("option_d", "")).strip(),
-                ]
-                choices = [c for c in choices if c]
-            # Try multiple format patterns for answer
-            answer = doc.get("answer", doc.get("label", doc.get("target", None)))
-            if isinstance(answer, str) and len(answer) == 1 and answer.isalpha():
-                answer_idx = ord(answer.upper()) - ord('A')
-            elif isinstance(answer, int):
-                answer_idx = answer
-            else:
-                return None
-            if not question or not choices or not (0 <= answer_idx < len(choices)):
-                log.debug("Skipping doc due to missing/invalid fields", extra={"doc": doc})
-                return None
-            correct = str(choices[answer_idx]).strip()
-            incorrect_idx = (answer_idx + 1) % len(choices)
-            incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
-            metadata = {"label": "code2text"}
-            return self._build_pair(
-                question=formatted_question,
-                correct=correct,
-                incorrect=incorrect,
-                metadata=metadata,
-            )
-        except Exception as exc:
-            log.error("Error extracting pair from doc", exc_info=exc, extra={"doc": doc})
-            return None
-    @staticmethod
-    def _build_pair(
-        question: str,
-        correct: str,
-        incorrect: str,
-        metadata: dict[str, Any] | None = None,
-    ) -> ContrastivePair:
-        positive_response = PositiveResponse(model_response=correct)
-        negative_response = NegativeResponse(model_response=incorrect)
-        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/codexglue.py DELETED Viewed

@@ -1,107 +0,0 @@
-from __future__ import annotations
-from typing import Any, TYPE_CHECKING
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
-from wisent.core.contrastive_pairs.lm_eval_pairs.atoms import LMEvalBenchmarkExtractor
-from wisent.core.cli_logger import setup_logger, bind
-if TYPE_CHECKING:
-    from lm_eval.api.task import ConfigurableTask
-__all__ = ["CodexglueExtractor"]
-_LOG = setup_logger(__name__)
-class CodexglueExtractor(LMEvalBenchmarkExtractor):
-    """Extractor for Codexglue benchmark."""
-    def extract_contrastive_pairs(
-        self,
-        lm_eval_task_data: ConfigurableTask,
-        limit: int | None = None,
-        preferred_doc: str | None = None,
-    ) -> list[ContrastivePair]:
-        log = bind(_LOG, task=getattr(lm_eval_task_data, "NAME", "unknown"))
-        max_items = self._normalize_limit(limit)
-        docs = self.load_docs(lm_eval_task_data, max_items, preferred_doc=preferred_doc)
-        pairs: list[ContrastivePair] = []
-        log.info("Extracting contrastive pairs", extra={"doc_count": len(docs)})
-        for doc in docs:
-            pair = self._extract_pair_from_doc(doc)
-            if pair is not None:
-                pairs.append(pair)
-                if max_items is not None and len(pairs) >= max_items:
-                    break
-        if not pairs:
-            task_name = getattr(lm_eval_task_data, "NAME", type(lm_eval_task_data).__name__)
-            log.warning("No valid pairs extracted", extra={"task": task_name})
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        log = bind(_LOG, doc_id=doc.get("id", "unknown"))
-        try:
-            # Try multiple format patterns for question
-            question = doc.get("question", doc.get("query", doc.get("input", doc.get("instruction", doc.get("prompt", ""))))).strip()
-            # Try multiple format patterns for choices
-            choices = doc.get("choices", doc.get("options", doc.get("answers", [])))
-            # Handle option_a/b/c/d format
-            if not choices and "option_a" in doc:
-                choices = [
-                    str(doc.get("option_a", "")).strip(),
-                    str(doc.get("option_b", "")).strip(),
-                    str(doc.get("option_c", "")).strip(),
-                    str(doc.get("option_d", "")).strip(),
-                ]
-                choices = [c for c in choices if c]
-            # Try multiple format patterns for answer
-            answer = doc.get("answer", doc.get("label", doc.get("target", None)))
-            if isinstance(answer, str) and len(answer) == 1 and answer.isalpha():
-                answer_idx = ord(answer.upper()) - ord('A')
-            elif isinstance(answer, int):
-                answer_idx = answer
-            else:
-                return None
-            if not question or not choices or not (0 <= answer_idx < len(choices)):
-                log.debug("Skipping doc due to missing/invalid fields", extra={"doc": doc})
-                return None
-            correct = str(choices[answer_idx]).strip()
-            incorrect_idx = (answer_idx + 1) % len(choices)
-            incorrect = str(choices[incorrect_idx]).strip()
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
-            metadata = {"label": "codexglue"}
-            return self._build_pair(
-                question=formatted_question,
-                correct=correct,
-                incorrect=incorrect,
-                metadata=metadata,
-            )
-        except Exception as exc:
-            log.error("Error extracting pair from doc", exc_info=exc, extra={"doc": doc})
-            return None
-    @staticmethod
-    def _build_pair(
-        question: str,
-        correct: str,
-        incorrect: str,
-        metadata: dict[str, Any] | None = None,
-    ) -> ContrastivePair:
-        positive_response = PositiveResponse(model_response=correct)
-        negative_response = NegativeResponse(model_response=incorrect)
-        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livemathbench.py DELETED Viewed

@@ -1,155 +0,0 @@
-from __future__ import annotations
-from typing import Any, TYPE_CHECKING
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
-from wisent.core.contrastive_pairs.lm_eval_pairs.atoms import LMEvalBenchmarkExtractor
-from wisent.core.cli_logger import setup_logger, bind
-if TYPE_CHECKING:
-    from lm_eval.api.task import ConfigurableTask
-__all__ = ["LivemathbenchExtractor"]
-_LOG = setup_logger(__name__)
-task_names = ("livemathbench", "livemathbench_cnmo_en", "livemathbench_cnmo_zh")
-class LivemathbenchExtractor(LMEvalBenchmarkExtractor):
-    """Extractor for Livemathbench benchmark (live math problems)."""
-    evaluator_name = "exact_match"
-    def extract_contrastive_pairs(
-        self,
-        lm_eval_task_data: ConfigurableTask,
-        limit: int | None = None,
-        preferred_doc: str | None = None,
-    ) -> list[ContrastivePair]:
-        """
-        Build contrastive pairs from Livemathbench docs.
-        Args:
-            lm_eval_task_data: lm-eval task instance for Livemathbench.
-            limit: Optional maximum number of pairs to produce.
-            preferred_doc: Optional preferred document source.
-        Returns:
-            A list of ContrastivePair objects.
-        """
-        log = bind(_LOG, task=getattr(lm_eval_task_data, "NAME", "unknown"))
-        max_items = self._normalize_limit(limit)
-        docs = self.load_docs(lm_eval_task_data, max_items, preferred_doc=preferred_doc)
-        pairs: list[ContrastivePair] = []
-        log.info("Extracting contrastive pairs", extra={"doc_count": len(docs)})
-        for doc in docs:
-            pair = self._extract_pair_from_doc(doc)
-            if pair is not None:
-                pairs.append(pair)
-                if max_items is not None and len(pairs) >= max_items:
-                    break
-        if not pairs:
-            task_name = getattr(lm_eval_task_data, "NAME", type(lm_eval_task_data).__name__)
-            log.warning("No valid Livemathbench pairs extracted", extra={"task": task_name})
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        """
-        Convert a single Livemathbench doc into a ContrastivePair, if possible.
-        Returns None when required fields are missing or malformed.
-        """
-        log = bind(_LOG, doc_id=doc.get("id", "unknown"))
-        try:
-            # Try multiple possible schema formats
-            question = None
-            choices = None
-            answer_idx = None
-            # Format 1: question + choices + answer
-            if "question" in doc and "choices" in doc:
-                question = str(doc.get("question", "")).strip()
-                choices_data = doc.get("choices", {})
-                if isinstance(choices_data, dict):
-                    choices = choices_data.get("text", [])
-                elif isinstance(choices_data, list):
-                    choices = choices_data
-                answer = doc.get("answer", doc.get("answerKey", ""))
-                if isinstance(answer, str) and len(answer) == 1 and answer.isalpha():
-                    answer_idx = ord(answer.upper()) - ord('A')
-                else:
-                    answer_idx = int(answer) if answer else 0
-            # Format 2: instruction + option_a/b/c/d + answer
-            elif "instruction" in doc and "option_a" in doc:
-                question = str(doc.get("instruction", "")).strip()
-                choices = [
-                    str(doc.get("option_a", "")).strip(),
-                    str(doc.get("option_b", "")).strip(),
-                    str(doc.get("option_c", "")).strip(),
-                    str(doc.get("option_d", "")).strip(),
-                ]
-                choices = [c for c in choices if c]
-                answer = doc.get("answer", "A")
-                answer_idx = ord(str(answer).upper()) - ord('A')
-            # Format 3: query/prompt/problem + answer (common for math)
-            elif "query" in doc or "prompt" in doc or "problem" in doc:
-                question = str(doc.get("query", doc.get("prompt", doc.get("problem", "")))).strip()
-                # For open-ended questions, use target as correct answer
-                correct_answer = str(doc.get("target", doc.get("answer", doc.get("solution", "")))).strip()
-                if correct_answer:
-                    metadata = {"label": "livemathbench"}
-                    return self._build_pair(
-                        question=f"Question: {question}",
-                        correct=correct_answer,
-                        incorrect="incorrect answer",
-                        metadata=metadata,
-                    )
-                return None
-            if not question or not choices or answer_idx is None or not (0 <= answer_idx < len(choices)):
-                log.debug(
-                    "Skipping doc due to missing/invalid fields",
-                    extra={"doc": doc},
-                )
-                return None
-            correct = choices[answer_idx]
-            incorrect_idx = (answer_idx + 1) % len(choices)
-            incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
-            metadata = {
-                "label": "livemathbench",
-            }
-            return self._build_pair(
-                question=formatted_question,
-                correct=correct,
-                incorrect=incorrect,
-                metadata=metadata,
-            )
-        except Exception as exc:
-            log.error("Error extracting pair from doc", exc_info=exc, extra={"doc": doc})
-            return None
-    @staticmethod
-    def _build_pair(
-        question: str,
-        correct: str,
-        incorrect: str,
-        metadata: dict[str, Any] | None = None,
-    ) -> ContrastivePair:
-        positive_response = PositiveResponse(model_response=correct)
-        negative_response = NegativeResponse(model_response=incorrect)
-        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))

wisent 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl