PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl - Mend

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1020) hide show

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/polymath.py DELETED Viewed

@@ -1,155 +0,0 @@
-from __future__ import annotations
-from typing import Any, TYPE_CHECKING
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
-from wisent.core.contrastive_pairs.lm_eval_pairs.atoms import LMEvalBenchmarkExtractor
-from wisent.core.cli_logger import setup_logger, bind
-if TYPE_CHECKING:
-    from lm_eval.api.task import ConfigurableTask
-__all__ = ["PolymathExtractor"]
-_LOG = setup_logger(__name__)
-task_names = ("polymath_en_medium", "polymath_zh_medium", "polymath_en_high", "polymath_zh_high")
-class PolymathExtractor(LMEvalBenchmarkExtractor):
-    """Extractor for Polymath benchmark (multilingual math reasoning)."""
-    evaluator_name = "exact_match"
-    def extract_contrastive_pairs(
-        self,
-        lm_eval_task_data: ConfigurableTask,
-        limit: int | None = None,
-        preferred_doc: str | None = None,
-    ) -> list[ContrastivePair]:
-        """
-        Build contrastive pairs from Polymath docs.
-        Args:
-            lm_eval_task_data: lm-eval task instance for Polymath.
-            limit: Optional maximum number of pairs to produce.
-            preferred_doc: Optional preferred document source.
-        Returns:
-            A list of ContrastivePair objects.
-        """
-        log = bind(_LOG, task=getattr(lm_eval_task_data, "NAME", "unknown"))
-        max_items = self._normalize_limit(limit)
-        docs = self.load_docs(lm_eval_task_data, max_items, preferred_doc=preferred_doc)
-        pairs: list[ContrastivePair] = []
-        log.info("Extracting contrastive pairs", extra={"doc_count": len(docs)})
-        for doc in docs:
-            pair = self._extract_pair_from_doc(doc)
-            if pair is not None:
-                pairs.append(pair)
-                if max_items is not None and len(pairs) >= max_items:
-                    break
-        if not pairs:
-            task_name = getattr(lm_eval_task_data, "NAME", type(lm_eval_task_data).__name__)
-            log.warning("No valid Polymath pairs extracted", extra={"task": task_name})
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        """
-        Convert a single Polymath doc into a ContrastivePair, if possible.
-        Returns None when required fields are missing or malformed.
-        """
-        log = bind(_LOG, doc_id=doc.get("id", "unknown"))
-        try:
-            # Try multiple possible schema formats
-            question = None
-            choices = None
-            answer_idx = None
-            # Format 1: question + choices + answer
-            if "question" in doc and "choices" in doc:
-                question = str(doc.get("question", "")).strip()
-                choices_data = doc.get("choices", {})
-                if isinstance(choices_data, dict):
-                    choices = choices_data.get("text", [])
-                elif isinstance(choices_data, list):
-                    choices = choices_data
-                answer = doc.get("answer", doc.get("answerKey", ""))
-                if isinstance(answer, str) and len(answer) == 1 and answer.isalpha():
-                    answer_idx = ord(answer.upper()) - ord('A')
-                else:
-                    answer_idx = int(answer) if answer else 0
-            # Format 2: instruction + option_a/b/c/d + answer
-            elif "instruction" in doc and "option_a" in doc:
-                question = str(doc.get("instruction", "")).strip()
-                choices = [
-                    str(doc.get("option_a", "")).strip(),
-                    str(doc.get("option_b", "")).strip(),
-                    str(doc.get("option_c", "")).strip(),
-                    str(doc.get("option_d", "")).strip(),
-                ]
-                choices = [c for c in choices if c]
-                answer = doc.get("answer", "A")
-                answer_idx = ord(str(answer).upper()) - ord('A')
-            # Format 3: query/prompt + answer (common for math)
-            elif "query" in doc or "prompt" in doc or "problem" in doc:
-                question = str(doc.get("query", doc.get("prompt", doc.get("problem", "")))).strip()
-                # For open-ended questions, use target as correct answer
-                correct_answer = str(doc.get("target", doc.get("answer", doc.get("solution", "")))).strip()
-                if correct_answer:
-                    metadata = {"label": "polymath"}
-                    return self._build_pair(
-                        question=f"Question: {question}",
-                        correct=correct_answer,
-                        incorrect="incorrect answer",
-                        metadata=metadata,
-                    )
-                return None
-            if not question or not choices or answer_idx is None or not (0 <= answer_idx < len(choices)):
-                log.debug(
-                    "Skipping doc due to missing/invalid fields",
-                    extra={"doc": doc},
-                )
-                return None
-            correct = choices[answer_idx]
-            incorrect_idx = (answer_idx + 1) % len(choices)
-            incorrect = choices[incorrect_idx]
-            formatted_question = f"Question: {question}\nA. {incorrect}\nB. {correct}"
-            metadata = {
-                "label": "polymath",
-            }
-            return self._build_pair(
-                question=formatted_question,
-                correct=correct,
-                incorrect=incorrect,
-                metadata=metadata,
-            )
-        except Exception as exc:
-            log.error("Error extracting pair from doc", exc_info=exc, extra={"doc": doc})
-            return None
-    @staticmethod
-    def _build_pair(
-        question: str,
-        correct: str,
-        incorrect: str,
-        metadata: dict[str, Any] | None = None,
-    ) -> ContrastivePair:
-        positive_response = PositiveResponse(model_response=correct)
-        negative_response = NegativeResponse(model_response=incorrect)
-        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))

wisent 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl