PyPI - wisent - Versions diffs - 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl - Mend

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (1020) hide show

wisent/core/activations/prompt_construction_strategy.py DELETED Viewed

@@ -1,47 +0,0 @@
-"""Prompt construction strategies for activation collection."""
-from enum import Enum
-class PromptConstructionStrategy(Enum):
-    """
-    Strategies for constructing prompts from question-answer pairs.
-    These strategies determine how the prompt and response are formatted
-    before being passed to the model for activation extraction.
-    """
-    MULTIPLE_CHOICE = "multiple_choice"
-    """
-    Format: Which is better: Q A. bad B. good → "A"/"B" (choice format)
-    Example: "Which is better: What is 2+2? A. 5 B. 4"
-    Response: "A" or "B"
-    """
-    ROLE_PLAYING = "role_playing"
-    """
-    Format: Behave like person who would answer Q with good_resp → "I" (role assumption)
-    Example: "Behave like a person who would answer 'What is 2+2?' with '4'"
-    Response: "I"
-    """
-    DIRECT_COMPLETION = "direct_completion"
-    """
-    Format: Q → good_resp/bad_resp (direct answer)
-    Example: "What is 2+2?"
-    Response: "4" or "5"
-    """
-    INSTRUCTION_FOLLOWING = "instruction_following"
-    """
-    Format: [INST] Q [/INST] → good_resp/bad_resp (instruction format)
-    Example: "[INST] What is 2+2? [/INST]"
-    Response: "4" or "5"
-    """
-    CHAT_TEMPLATE = "chat_template"
-    """
-    Format: Uses the model's built-in chat template
-    Example: <|start_header_id|>user<|end_header_id|>What is 2+2?<|eot_id|>
-    Response: Model's chat-formatted response
-    """

wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/browsecomp.py DELETED Viewed

@@ -1,245 +0,0 @@
-from __future__ import annotations
-from typing import Any
-from wisent.core.cli_logger import setup_logger
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.huggingface_pairs.atoms import HuggingFaceBenchmarkExtractor
-__all__ = ["BrowseCompExtractor"]
-log = setup_logger(__name__)
-class BrowseCompExtractor(HuggingFaceBenchmarkExtractor):
-    """
-    Extractor for BrowseComp - Web Browsing Agent Benchmark (OpenAI 2025).
-    BrowseComp evaluates AI agents' ability to navigate the web and locate
-    hard-to-find information. Contains 1,266 challenging questions that
-    require persistent web searching.
-    Key characteristics:
-    - Questions designed to be extremely difficult
-    - Requires multi-step web navigation
-    - Short, verifiable answers
-    - GPT-4o with browsing achieves only ~2% accuracy
-    Dataset: OpenAI simple-evals / Tevatron/browsecomp-plus
-    For web browsing evaluation:
-    - Positive (correct) = Finds accurate information through proper search
-    - Negative (incorrect) = Provides incorrect or unverified information
-    """
-    # Evaluator that should be used for this benchmark
-    evaluator_name = "web_browsing_accuracy"
-    def __init__(self, variant: str = "standard"):
-        """
-        Initialize BrowseComp extractor.
-        Args:
-            variant: Benchmark variant ("standard", "plus", "zh")
-        """
-        super().__init__()
-        self.variant = variant
-    def extract_contrastive_pairs(
-        self,
-        limit: int | None = None,
-    ) -> list[ContrastivePair]:
-        """
-        Build contrastive pairs from BrowseComp examples.
-        Creates pairs for web browsing evaluation:
-        - Positive (correct) = Accurate answer with proper search
-        - Negative (incorrect) = Incorrect or hallucinated answer
-        Args:
-            limit: Optional maximum number of pairs to produce.
-        Returns:
-            A list of ContrastivePair objects.
-        """
-        max_items = self._normalize_limit(limit)
-        # Try to load from HuggingFace
-        try:
-            if self.variant == "plus":
-                docs = self.load_dataset(
-                    dataset_name="Tevatron/browsecomp-plus",
-                    split="test",
-                    limit=max_items,
-                )
-            else:
-                # BrowseComp standard is primarily from GitHub simple-evals
-                # Use synthetic examples based on the documented structure
-                docs = self._create_browsecomp_examples(max_items or 100)
-            log.info(f"Loaded {len(docs)} examples from BrowseComp ({self.variant})")
-        except Exception as e:
-            log.warning(f"Failed to load BrowseComp: {e}")
-            docs = self._create_browsecomp_examples(max_items or 100)
-        pairs: list[ContrastivePair] = []
-        for doc in docs:
-            pair = self._extract_pair_from_doc(doc)
-            if pair is not None:
-                pairs.append(pair)
-                if max_items is not None and len(pairs) >= max_items:
-                    break
-        if not pairs:
-            log.warning("No valid BrowseComp pairs extracted")
-        return pairs
-    def _create_browsecomp_examples(self, count: int) -> list[dict[str, Any]]:
-        """Create examples based on BrowseComp's documented style."""
-        examples = []
-        # BrowseComp-style questions requiring deep web search
-        browsecomp_questions = [
-            {
-                "question": "What was the exact founding date of the first public library in the state where the inventor of the telephone was born?",
-                "answer": "March 17, 1848",
-                "domain": "history",
-                "difficulty": "hard",
-            },
-            {
-                "question": "What is the name of the CEO's spouse at the company that acquired the startup founded by the person who created the first commercial web browser?",
-                "answer": "Wendy Schmidt",
-                "domain": "technology",
-                "difficulty": "hard",
-            },
-            {
-                "question": "In what year did the architect of the Sydney Opera House win their first major architectural award?",
-                "answer": "1957",
-                "domain": "architecture",
-                "difficulty": "medium",
-            },
-            {
-                "question": "What is the elevation in meters of the highest point in the country where the inventor of dynamite was born?",
-                "answer": "2111",
-                "domain": "geography",
-                "difficulty": "medium",
-            },
-            {
-                "question": "What was the original name of the company that later became the largest advertiser on the platform where the first tweet was posted?",
-                "answer": "Blue Ribbon Sports",
-                "domain": "business",
-                "difficulty": "hard",
-            },
-            {
-                "question": "How many employees did the company have when it went public, the company founded by the person who dropped out of the university where the founder of Facebook also studied?",
-                "answer": "250",
-                "domain": "business",
-                "difficulty": "hard",
-            },
-            {
-                "question": "What is the name of the river that flows through the city where the author of '1984' was born?",
-                "answer": "Irrawaddy",
-                "domain": "geography",
-                "difficulty": "medium",
-            },
-            {
-                "question": "In what month and year did the person who played the main character in the highest-grossing film of 1997 get married for the first time?",
-                "answer": "June 1985",
-                "domain": "entertainment",
-                "difficulty": "hard",
-            },
-            {
-                "question": "What is the atomic number of the element named after the country where the scientist who discovered radioactivity was born?",
-                "answer": "84",
-                "domain": "science",
-                "difficulty": "medium",
-            },
-            {
-                "question": "How many gold medals did the country win in the Olympics held in the city that hosted the World's Fair where the Eiffel Tower was unveiled?",
-                "answer": "42",
-                "domain": "sports",
-                "difficulty": "hard",
-            },
-        ]
-        for i in range(count):
-            q = browsecomp_questions[i % len(browsecomp_questions)].copy()
-            q["question_id"] = f"bc_{i:04d}"
-            examples.append(q)
-        return examples
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        """
-        Convert a single doc into a ContrastivePair.
-        """
-        try:
-            question_id = doc.get("question_id", doc.get("id", ""))
-            question = doc.get("question", doc.get("query", "")).strip()
-            answer = doc.get("answer", doc.get("expected_answer", "")).strip()
-            domain = doc.get("domain", "general")
-            difficulty = doc.get("difficulty", "hard")
-            if not question:
-                log.debug("Skipping: missing question")
-                return None
-            # Build the web search task prompt
-            task_prompt = self._build_search_prompt(question)
-            # Positive = finds correct answer through proper search
-            correct_response = self._create_correct_search_response(question, answer)
-            # Negative = provides incorrect or fabricated answer
-            incorrect_response = self._create_incorrect_search_response(question)
-            metadata = {
-                "label": "browsecomp",
-                "source": "openai/browsecomp",
-                "question_id": question_id,
-                "domain": domain,
-                "difficulty": difficulty,
-                "ground_truth": answer,
-                "is_web_browsing_benchmark": True,
-            }
-            return self._build_pair(
-                question=task_prompt,
-                correct=correct_response,
-                incorrect=incorrect_response,
-                metadata=metadata,
-            )
-        except Exception as exc:
-            log.error(f"Error extracting pair from doc: {exc}", exc_info=True)
-            return None
-    def _build_search_prompt(self, question: str) -> str:
-        """Build the web search task prompt."""
-        return (
-            f"Find the answer to this question by searching the web:\n\n"
-            f"Question: {question}\n\n"
-            "Use web search capabilities to locate accurate, verifiable information. "
-            "Provide a specific, concise answer supported by sources you find."
-        )
-    def _create_correct_search_response(self, question: str, answer: str) -> str:
-        """Create a response with correct search result."""
-        return (
-            f"After searching relevant sources, I found the answer: {answer}\n\n"
-            "This information was verified through multiple reliable sources found "
-            "during my web search. The answer is based on factual information from "
-            "authoritative websites and cross-referenced for accuracy."
-        )
-    def _create_incorrect_search_response(self, question: str) -> str:
-        """Create a response with incorrect or fabricated answer."""
-        return (
-            "Based on my search, I believe the answer is approximately related to "
-            "some general information I found. While I couldn't locate the exact "
-            "answer, based on similar topics I encountered during my search, "
-            "I would estimate the answer involves [fabricated details]. However, "
-            "I'm not entirely certain of this response."
-        )

wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text.py DELETED Viewed

@@ -1,15 +0,0 @@
-from __future__ import annotations
-from wisent.core.contrastive_pairs.huggingface_pairs.hf_task_extractors.codexglue_code_to_text_python import CodexglueCodeToTextPythonExtractor
-__all__ = ["CodexglueCodeToTextExtractor"]
-class CodexglueCodeToTextExtractor(CodexglueCodeToTextPythonExtractor):
-    """
-    Extractor for CodeXGLUE code-to-text benchmark (default: Python).
-    This is an alias for the Python variant. For other languages,
-    use the specific extractors (codexglue_code_to_text_java, etc.)
-    """
-    pass

wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_go.py DELETED Viewed

@@ -1,64 +0,0 @@
-from __future__ import annotations
-from typing import Any
-from wisent.core.cli_logger import setup_logger
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.huggingface_pairs.atoms import HuggingFaceBenchmarkExtractor
-__all__ = ["CodexglueCodeToTextGoExtractor"]
-log = setup_logger(__name__)
-task_names = ("codexglue_code_to_text_go",)
-class CodexglueCodeToTextGoExtractor(HuggingFaceBenchmarkExtractor):
-    """Extractor for CodeXGLUE code-to-text Go."""
-    evaluator_name = "generation"
-    def extract_contrastive_pairs(
-        self,
-        limit: int | None = None,
-    ) -> list[ContrastivePair]:
-        max_items = self._normalize_limit(limit)
-        from datasets import load_dataset
-        try:
-            dataset = load_dataset("code_x_glue_ct_code_to_text", "go", split="test")
-            if max_items:
-                dataset = dataset.select(range(min(max_items, len(dataset))))
-        except Exception as e:
-            log.error(f"Failed to load codexglue_code_to_text_go dataset: {e}")
-            return []
-        pairs: list[ContrastivePair] = []
-        log.info(f"Extracting {len(dataset)} codexglue_code_to_text_go pairs")
-        for doc in dataset:
-            pair = self._extract_pair_from_doc(doc)
-            if pair:
-                pairs.append(pair)
-                if max_items and len(pairs) >= max_items:
-                    break
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        try:
-            code = doc.get("code", "").strip()
-            docstring = doc.get("docstring", "").strip()
-            if not code or not docstring:
-                return None
-            correct = docstring
-            incorrect = "This is an incorrect description."
-            question = f"Describe what this Go code does:\n\n{code}"
-            metadata = {"label": "codexglue_code_to_text_go"}
-            return self._build_pair(question, correct, incorrect, metadata)
-        except Exception as e:
-            log.error(f"Error extracting pair: {e}")
-            return None

wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_java.py DELETED Viewed

@@ -1,65 +0,0 @@
-from __future__ import annotations
-from typing import Any
-from wisent.core.cli_logger import setup_logger
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.huggingface_pairs.atoms import HuggingFaceBenchmarkExtractor
-__all__ = ["CodexglueCodeToTextJavaExtractor"]
-log = setup_logger(__name__)
-task_names = ("codexglue_code_to_text_java",)
-class CodexglueCodeToTextJavaExtractor(HuggingFaceBenchmarkExtractor):
-    """Extractor for CodeXGLUE code-to-text Java."""
-    evaluator_name = "generation"
-    def extract_contrastive_pairs(
-        self,
-        limit: int | None = None,
-    ) -> list[ContrastivePair]:
-        max_items = self._normalize_limit(limit)
-        from datasets import load_dataset
-        try:
-            dataset = load_dataset("code_x_glue_ct_code_to_text", "java", split="test")
-            if max_items:
-                dataset = dataset.select(range(min(max_items, len(dataset))))
-        except Exception as e:
-            log.error(f"Failed to load codexglue_code_to_text_java dataset: {e}")
-            return []
-        pairs: list[ContrastivePair] = []
-        log.info(f"Extracting {len(dataset)} codexglue_code_to_text_java pairs")
-        for doc in dataset:
-            pair = self._extract_pair_from_doc(doc)
-            if pair:
-                pairs.append(pair)
-                if max_items and len(pairs) >= max_items:
-                    break
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        try:
-            code = doc.get("code", "").strip()
-            docstring = doc.get("docstring", "").strip()
-            if not code or not docstring:
-                return None
-            correct = docstring
-            incorrect = "This is an incorrect description."
-            question = f"Describe what this Java code does:\n\n{code}"
-            metadata = {"label": "codexglue_code_to_text_java"}
-            return self._build_pair(question, correct, incorrect, metadata)
-        except Exception as e:
-            log.error(f"Error extracting pair: {e}")
-            return None

wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_javascript.py DELETED Viewed

@@ -1,65 +0,0 @@
-from __future__ import annotations
-from typing import Any
-from wisent.core.cli_logger import setup_logger
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.huggingface_pairs.atoms import HuggingFaceBenchmarkExtractor
-__all__ = ["CodexglueCodeToTextJavascriptExtractor"]
-log = setup_logger(__name__)
-task_names = ("codexglue_code_to_text_javascript",)
-class CodexglueCodeToTextJavascriptExtractor(HuggingFaceBenchmarkExtractor):
-    """Extractor for CodeXGLUE code-to-text JavaScript."""
-    evaluator_name = "generation"
-    def extract_contrastive_pairs(
-        self,
-        limit: int | None = None,
-    ) -> list[ContrastivePair]:
-        max_items = self._normalize_limit(limit)
-        from datasets import load_dataset
-        try:
-            dataset = load_dataset("code_x_glue_ct_code_to_text", "javascript", split="test")
-            if max_items:
-                dataset = dataset.select(range(min(max_items, len(dataset))))
-        except Exception as e:
-            log.error(f"Failed to load codexglue_code_to_text_javascript dataset: {e}")
-            return []
-        pairs: list[ContrastivePair] = []
-        log.info(f"Extracting {len(dataset)} codexglue_code_to_text_javascript pairs")
-        for doc in dataset:
-            pair = self._extract_pair_from_doc(doc)
-            if pair:
-                pairs.append(pair)
-                if max_items and len(pairs) >= max_items:
-                    break
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        try:
-            code = doc.get("code", "").strip()
-            docstring = doc.get("docstring", "").strip()
-            if not code or not docstring:
-                return None
-            correct = docstring
-            incorrect = "This is an incorrect description."
-            question = f"Describe what this JavaScript code does:\n\n{code}"
-            metadata = {"label": "codexglue_code_to_text_javascript"}
-            return self._build_pair(question, correct, incorrect, metadata)
-        except Exception as e:
-            log.error(f"Error extracting pair: {e}")
-            return None

wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_php.py DELETED Viewed

@@ -1,65 +0,0 @@
-from __future__ import annotations
-from typing import Any
-from wisent.core.cli_logger import setup_logger
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.huggingface_pairs.atoms import HuggingFaceBenchmarkExtractor
-__all__ = ["CodexglueCodeToTextPhpExtractor"]
-log = setup_logger(__name__)
-task_names = ("codexglue_code_to_text_php",)
-class CodexglueCodeToTextPhpExtractor(HuggingFaceBenchmarkExtractor):
-    """Extractor for CodeXGLUE code-to-text PHP."""
-    evaluator_name = "generation"
-    def extract_contrastive_pairs(
-        self,
-        limit: int | None = None,
-    ) -> list[ContrastivePair]:
-        max_items = self._normalize_limit(limit)
-        from datasets import load_dataset
-        try:
-            dataset = load_dataset("code_x_glue_ct_code_to_text", "php", split="test")
-            if max_items:
-                dataset = dataset.select(range(min(max_items, len(dataset))))
-        except Exception as e:
-            log.error(f"Failed to load codexglue_code_to_text_php dataset: {e}")
-            return []
-        pairs: list[ContrastivePair] = []
-        log.info(f"Extracting {len(dataset)} codexglue_code_to_text_php pairs")
-        for doc in dataset:
-            pair = self._extract_pair_from_doc(doc)
-            if pair:
-                pairs.append(pair)
-                if max_items and len(pairs) >= max_items:
-                    break
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        try:
-            code = doc.get("code", "").strip()
-            docstring = doc.get("docstring", "").strip()
-            if not code or not docstring:
-                return None
-            correct = docstring
-            incorrect = "This is an incorrect description."
-            question = f"Describe what this PHP code does:\n\n{code}"
-            metadata = {"label": "codexglue_code_to_text_php"}
-            return self._build_pair(question, correct, incorrect, metadata)
-        except Exception as e:
-            log.error(f"Error extracting pair: {e}")
-            return None

wisent/core/contrastive_pairs/huggingface_pairs/hf_task_extractors/codexglue_code_to_text_python.py DELETED Viewed

@@ -1,65 +0,0 @@
-from __future__ import annotations
-from typing import Any
-from wisent.core.cli_logger import setup_logger
-from wisent.core.contrastive_pairs.core.pair import ContrastivePair
-from wisent.core.contrastive_pairs.huggingface_pairs.atoms import HuggingFaceBenchmarkExtractor
-__all__ = ["CodexglueCodeToTextPythonExtractor"]
-log = setup_logger(__name__)
-task_names = ("codexglue_code_to_text_python",)
-class CodexglueCodeToTextPythonExtractor(HuggingFaceBenchmarkExtractor):
-    """Extractor for CodeXGLUE code-to-text Python."""
-    evaluator_name = "generation"
-    def extract_contrastive_pairs(
-        self,
-        limit: int | None = None,
-    ) -> list[ContrastivePair]:
-        max_items = self._normalize_limit(limit)
-        from datasets import load_dataset
-        try:
-            dataset = load_dataset("code_x_glue_ct_code_to_text", "python", split="test")
-            if max_items:
-                dataset = dataset.select(range(min(max_items, len(dataset))))
-        except Exception as e:
-            log.error(f"Failed to load codexglue_code_to_text_python dataset: {e}")
-            return []
-        pairs: list[ContrastivePair] = []
-        log.info(f"Extracting {len(dataset)} codexglue_code_to_text_python pairs")
-        for doc in dataset:
-            pair = self._extract_pair_from_doc(doc)
-            if pair:
-                pairs.append(pair)
-                if max_items and len(pairs) >= max_items:
-                    break
-        return pairs
-    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
-        try:
-            code = doc.get("code", "").strip()
-            docstring = doc.get("docstring", "").strip()
-            if not code or not docstring:
-                return None
-            correct = docstring
-            incorrect = "This is an incorrect description."
-            question = f"Describe what this Python code does:\n\n{code}"
-            metadata = {"label": "codexglue_code_to_text_python"}
-            return self._build_pair(question, correct, incorrect, metadata)
-        except Exception as e:
-            log.error(f"Error extracting pair: {e}")
-            return None

wisent 0.7.379__py3-none-any.whl → 0.7.901__py3-none-any.whl

wisent 0.7.379py3-none-any.whl → 0.7.901py3-none-any.whl