PyPI - wisent - Versions diffs - 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

wisent 0.1.1py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (237) hide show

wisent/__init__.py +1 -8
wisent/benchmarks/__init__.py +0 -0
wisent/benchmarks/coding/__init__.py +0 -0
wisent/benchmarks/coding/metrics/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
wisent/benchmarks/coding/metrics/evaluator.py +275 -0
wisent/benchmarks/coding/metrics/passk.py +66 -0
wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
wisent/benchmarks/coding/providers/__init__.py +18 -0
wisent/benchmarks/coding/providers/core/__init__.py +0 -0
wisent/benchmarks/coding/providers/core/atoms.py +31 -0
wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
wisent/classifiers/__init__.py +0 -0
wisent/classifiers/core/__init__.py +0 -0
wisent/classifiers/core/atoms.py +747 -0
wisent/classifiers/models/__init__.py +0 -0
wisent/classifiers/models/logistic.py +29 -0
wisent/classifiers/models/mlp.py +47 -0
wisent/cli/__init__.py +0 -0
wisent/cli/classifiers/__init__.py +0 -0
wisent/cli/classifiers/classifier_rotator.py +137 -0
wisent/cli/cli_logger.py +142 -0
wisent/cli/data_loaders/__init__.py +0 -0
wisent/cli/data_loaders/data_loader_rotator.py +96 -0
wisent/cli/evaluators/__init__.py +0 -0
wisent/cli/evaluators/evaluator_rotator.py +148 -0
wisent/cli/steering_methods/__init__.py +0 -0
wisent/cli/steering_methods/steering_rotator.py +110 -0
wisent/cli/wisent_cli/__init__.py +0 -0
wisent/cli/wisent_cli/commands/__init__.py +0 -0
wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
wisent/cli/wisent_cli/commands/listing.py +154 -0
wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
wisent/cli/wisent_cli/main.py +93 -0
wisent/cli/wisent_cli/shell.py +80 -0
wisent/cli/wisent_cli/ui.py +69 -0
wisent/cli/wisent_cli/util/__init__.py +0 -0
wisent/cli/wisent_cli/util/aggregations.py +43 -0
wisent/cli/wisent_cli/util/parsing.py +126 -0
wisent/cli/wisent_cli/version.py +4 -0
wisent/core/__init__.py +27 -0
wisent/core/activations/__init__.py +0 -0
wisent/core/activations/activations_collector.py +338 -0
wisent/core/activations/core/__init__.py +0 -0
wisent/core/activations/core/atoms.py +216 -0
wisent/core/agent/__init__.py +18 -0
wisent/core/agent/budget.py +638 -0
wisent/core/agent/device_benchmarks.py +685 -0
wisent/core/agent/diagnose/__init__.py +55 -0
wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
wisent/core/agent/diagnose/create_classifier.py +1154 -0
wisent/core/agent/diagnose/response_diagnostics.py +268 -0
wisent/core/agent/diagnose/select_classifiers.py +506 -0
wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
wisent/core/agent/diagnose/tasks/__init__.py +33 -0
wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
wisent/core/agent/diagnose.py +242 -0
wisent/core/agent/steer.py +212 -0
wisent/core/agent/timeout.py +134 -0
wisent/core/autonomous_agent.py +1234 -0
wisent/core/bigcode_integration.py +583 -0
wisent/core/contrastive_pairs/__init__.py +15 -0
wisent/core/contrastive_pairs/core/__init__.py +0 -0
wisent/core/contrastive_pairs/core/atoms.py +45 -0
wisent/core/contrastive_pairs/core/buliders.py +59 -0
wisent/core/contrastive_pairs/core/pair.py +178 -0
wisent/core/contrastive_pairs/core/response.py +152 -0
wisent/core/contrastive_pairs/core/serialization.py +300 -0
wisent/core/contrastive_pairs/core/set.py +133 -0
wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
wisent/core/data_loaders/__init__.py +0 -0
wisent/core/data_loaders/core/__init__.py +0 -0
wisent/core/data_loaders/core/atoms.py +98 -0
wisent/core/data_loaders/loaders/__init__.py +0 -0
wisent/core/data_loaders/loaders/custom.py +120 -0
wisent/core/data_loaders/loaders/lm_loader.py +218 -0
wisent/core/detection_handling.py +257 -0
wisent/core/download_full_benchmarks.py +1386 -0
wisent/core/evaluators/__init__.py +0 -0
wisent/core/evaluators/oracles/__init__.py +0 -0
wisent/core/evaluators/oracles/interactive.py +73 -0
wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
wisent/core/evaluators/oracles/user_specified.py +67 -0
wisent/core/hyperparameter_optimizer.py +429 -0
wisent/core/lm_eval_harness_ground_truth.py +1396 -0
wisent/core/log_likelihoods_evaluator.py +321 -0
wisent/core/managed_cached_benchmarks.py +595 -0
wisent/core/mixed_benchmark_sampler.py +364 -0
wisent/core/model_config_manager.py +330 -0
wisent/core/model_persistence.py +317 -0
wisent/core/models/__init__.py +0 -0
wisent/core/models/core/__init__.py +0 -0
wisent/core/models/core/atoms.py +460 -0
wisent/core/models/wisent_model.py +727 -0
wisent/core/multi_steering.py +316 -0
wisent/core/optuna/__init__.py +57 -0
wisent/core/optuna/classifier/__init__.py +25 -0
wisent/core/optuna/classifier/activation_generator.py +349 -0
wisent/core/optuna/classifier/classifier_cache.py +509 -0
wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
wisent/core/optuna/steering/__init__.py +0 -0
wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
wisent/core/optuna/steering/data_utils.py +342 -0
wisent/core/optuna/steering/metrics.py +474 -0
wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
wisent/core/optuna/steering/steering_optimization.py +1111 -0
wisent/core/parser.py +1668 -0
wisent/core/prompts/__init__.py +0 -0
wisent/core/prompts/core/__init__.py +0 -0
wisent/core/prompts/core/atom.py +57 -0
wisent/core/prompts/core/prompt_formater.py +157 -0
wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
wisent/core/representation.py +5 -0
wisent/core/sample_size_optimizer.py +648 -0
wisent/core/sample_size_optimizer_v2.py +355 -0
wisent/core/save_results.py +277 -0
wisent/core/steering.py +652 -0
wisent/core/steering_method.py +26 -0
wisent/core/steering_methods/__init__.py +0 -0
wisent/core/steering_methods/core/__init__.py +0 -0
wisent/core/steering_methods/core/atoms.py +153 -0
wisent/core/steering_methods/methods/__init__.py +0 -0
wisent/core/steering_methods/methods/caa.py +44 -0
wisent/core/steering_optimizer.py +1297 -0
wisent/core/task_interface.py +132 -0
wisent/core/task_selector.py +189 -0
wisent/core/tasks/__init__.py +175 -0
wisent/core/tasks/aime_task.py +141 -0
wisent/core/tasks/file_task.py +211 -0
wisent/core/tasks/hle_task.py +180 -0
wisent/core/tasks/hmmt_task.py +119 -0
wisent/core/tasks/livecodebench_task.py +201 -0
wisent/core/tasks/livemathbench_task.py +158 -0
wisent/core/tasks/lm_eval_task.py +455 -0
wisent/core/tasks/math500_task.py +84 -0
wisent/core/tasks/polymath_task.py +146 -0
wisent/core/tasks/supergpqa_task.py +220 -0
wisent/core/time_estimator.py +149 -0
wisent/core/timing_calibration.py +174 -0
wisent/core/tracking/__init__.py +54 -0
wisent/core/tracking/latency.py +618 -0
wisent/core/tracking/memory.py +359 -0
wisent/core/trainers/__init__.py +0 -0
wisent/core/trainers/core/__init__.py +11 -0
wisent/core/trainers/core/atoms.py +45 -0
wisent/core/trainers/steering_trainer.py +271 -0
wisent/core/user_model_config.py +158 -0
wisent/opti/__init__.py +0 -0
wisent/opti/core/__init__.py +0 -0
wisent/opti/core/atoms.py +175 -0
wisent/opti/methods/__init__.py +0 -0
wisent/opti/methods/opti_classificator.py +172 -0
wisent/opti/methods/opti_steering.py +138 -0
wisent/synthetic/__init__.py +0 -0
wisent/synthetic/cleaners/__init__.py +0 -0
wisent/synthetic/cleaners/core/__init__.py +0 -0
wisent/synthetic/cleaners/core/atoms.py +58 -0
wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
wisent/synthetic/cleaners/methods/__init__.py +0 -0
wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
wisent/synthetic/db_instructions/__init__.py +0 -0
wisent/synthetic/db_instructions/core/__init__.py +0 -0
wisent/synthetic/db_instructions/core/atoms.py +25 -0
wisent/synthetic/db_instructions/mini_dp.py +37 -0
wisent/synthetic/generators/__init__.py +0 -0
wisent/synthetic/generators/core/__init__.py +0 -0
wisent/synthetic/generators/core/atoms.py +73 -0
wisent/synthetic/generators/diversities/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/core.py +68 -0
wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
wisent/synthetic/generators/pairs_generator.py +179 -0
wisent-0.5.1.dist-info/METADATA +67 -0
wisent-0.5.1.dist-info/RECORD +218 -0
{wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/WHEEL +1 -1
{wisent-0.1.1.dist-info → wisent-0.5.1.dist-info/licenses}/LICENSE +2 -2
wisent/activations/__init__.py +0 -9
wisent/activations/client.py +0 -97
wisent/activations/extractor.py +0 -251
wisent/activations/models.py +0 -95
wisent/client.py +0 -45
wisent/control_vector/__init__.py +0 -9
wisent/control_vector/client.py +0 -85
wisent/control_vector/manager.py +0 -168
wisent/control_vector/models.py +0 -70
wisent/inference/__init__.py +0 -9
wisent/inference/client.py +0 -103
wisent/inference/inferencer.py +0 -250
wisent/inference/models.py +0 -66
wisent/utils/__init__.py +0 -3
wisent/utils/auth.py +0 -30
wisent/utils/http.py +0 -228
wisent/version.py +0 -3
wisent-0.1.1.dist-info/METADATA +0 -142
wisent-0.1.1.dist-info/RECORD +0 -23
{wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/top_level.txt +0 -0

wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py ADDED Viewed

@@ -0,0 +1,238 @@
+from __future__ import annotations
+from collections.abc import Iterable, Mapping
+from typing import Any, Sequence, TYPE_CHECKING
+from abc import ABC, abstractmethod
+if TYPE_CHECKING:
+    from wisent_guard.core.contrastive_pairs.core.pair import ContrastivePair
+    from lm_eval.api.task import ConfigurableTask
+__all__ = [
+    "UnsupportedLMEvalBenchmarkError",
+    "NoLabelledDocsAvailableError",
+    "LMEvalBenchmarkExtractor",
+]
+class UnsupportedLMEvalBenchmarkError(Exception):
+    """Raised when a benchmark/task does not have a compatible extractor."""
+class NoLabelledDocsAvailableError(UnsupportedLMEvalBenchmarkError):
+    """
+    Raised when no labeled documents can be found for a given lm-eval task.
+    This typically indicates the task does not expose any of:
+    validation/test/training/fewshot docs, nor sufficient dataset metadata
+    to load a split directly.
+    """
+class LMEvalBenchmarkExtractor(ABC):
+    """
+    Abstract base class for lm-eval benchmark-specific extractors.
+    Subclasses should implement :meth:'extract_contrastive_pairs' to transform
+    task documents into a list of :class:'ContrastivePair' instances.
+    Utility methods are provided to load the most appropriate labeled documents
+    from a task, with a clear order of preference and a robust dataset fallback.
+    """
+    @abstractmethod
+    def extract_contrastive_pairs(
+        self,
+        lm_eval_task_data: ConfigurableTask,
+        limit: int | None = None,
+    ) -> list[ContrastivePair]:
+        """
+        Extract contrastive pairs from the provided lm-eval task.
+        arguments:
+            lm_eval_task_data:
+                An lm-eval task instance.
+            limit:
+                Optional upper bound on the number of pairs to return.
+                Values <= 0 are treated as "no limit".
+        returns:
+            A list of :class:'ContrastivePair'.
+        """
+        raise NotImplementedError
+    @classmethod
+    def load_docs(
+        cls,
+        lm_eval_task_data: ConfigurableTask,
+        limit: int | None = None,
+    ) -> list[dict[str, Any]]:
+        """
+        Load labeled documents from the most appropriate split with a clear
+        preference order:
+            validation → test → train → fewshot
+        If none are available, attempts a dataset fallback using
+        'datasets.load_dataset' with the task's declared metadata
+        (e.g., 'dataset_path'/'dataset_name', 'dataset_config_name',
+        and 'fewshot_split').
+        arguments:
+            lm_eval_task_data:
+                Task object from lm-eval.
+            limit:
+                Optional maximum number of documents to return.
+                Values <= 0 are treated as "no limit".
+        returns:
+            A list of document dictionaries.
+        raises:
+            NoLabelledDocsAvailableError:
+                If no labeled documents are available.
+            RuntimeError:
+                If a dataset fallback is attempted and fails to load.
+        """
+        max_items = cls._normalize_limit(limit)
+        preferred_sources: Sequence[tuple[str, str]] = (
+            ("has_validation_docs", "validation_docs"),
+            ("has_test_docs", "test_docs"),
+            ("has_training_docs", "training_docs"),
+            ("has_fewshot_docs", "fewshot_docs"),
+        )
+        for has_method, docs_method in preferred_sources:
+            if cls._has_true(lm_eval_task_data, has_method) and cls._has_callable(
+                lm_eval_task_data, docs_method
+            ):
+                docs_iter = getattr(lm_eval_task_data, docs_method)()
+                docs_list = cls._coerce_docs_to_dicts(docs_iter, max_items)
+                if docs_list:
+                    return docs_list
+        # Fallback to dataset split (common for tasks relying on fewshot_split).
+        docs_list = cls._fallback_load_from_dataset(lm_eval_task_data, max_items)
+        if docs_list:
+            return docs_list
+        task_name = getattr(lm_eval_task_data, "NAME", type(lm_eval_task_data).__name__)
+        raise NoLabelledDocsAvailableError(
+            f"No labeled documents are available for task '{task_name}'. "
+            "The task does not expose validation/test/train/fewshot docs, "
+            "and no usable dataset metadata was found for a fallback load.\n\n"
+            "Tip: Ensure your task implements at least one of the doc getters "
+            "(validation_docs/test_docs/training_docs/fewshot_docs), or that it "
+            "declares dataset metadata (dataset_path or dataset_name, "
+            "dataset_config_name, and fewshot_split) so a split can be loaded."
+        )
+    @staticmethod
+    def _normalize_limit(limit: int | None) -> int | None:
+        """
+        Normalize limit semantics:
+          - None → None (unbounded)
+          - <= 0 → None (unbounded)
+          - > 0 → limit
+        """
+        if limit is None or limit <= 0:
+            return None
+        return int(limit)
+    @staticmethod
+    def _has_callable(obj: Any, name: str) -> bool:
+        """Return True if obj has a callable attribute with the given name."""
+        return hasattr(obj, name) and callable(getattr(obj, name))
+    @staticmethod
+    def _has_true(obj: Any, name: str) -> bool:
+        """Return True if obj has an attribute that evaluates to True when called or read."""
+        attr = getattr(obj, name, None)
+        try:
+            return bool(attr() if callable(attr) else attr)
+        except Exception:  # pragma: no cover (defensive)
+            return False
+    @classmethod
+    def _coerce_docs_to_dicts(
+        cls,
+        docs_iter: Iterable[Any] | None,
+        max_items: int | None,
+    ) -> list[dict[str, Any]]:
+        """
+        Materialize an iterable of docs into a list of dictionaries,
+        applying an optional limit.
+        """
+        if docs_iter is None:
+            return []
+        out: list[dict[str, Any]] = []
+        for idx, item in enumerate(docs_iter):
+            if max_items is not None and idx >= max_items:
+                break
+            if isinstance(item, Mapping):
+                out.append(dict(item))
+            else:
+                try:
+                    out.append(dict(item))
+                except Exception as exc:
+                    raise TypeError(
+                        "Expected each document to be a mapping-like object that can "
+                        "be converted to dict. Got type "
+                        f"{type(item).__name__} with value {item!r}"
+                    ) from exc
+        return out
+    @classmethod
+    def _fallback_load_from_dataset(
+        cls,
+        lm_eval_task_data: ConfigurableTask,
+        max_items: int | None,
+    ) -> list[dict[str, Any]]:
+        """
+        Attempt to load documents via datasets.load_dataset using the task's
+        declared metadata. We prefer 'fewshot_split' if present, since this is
+        a common pattern for tasks like (M)MMLU.
+        returns:
+            A possibly empty list of docs.
+        """
+        dataset_name = getattr(lm_eval_task_data, "dataset_path", None) or getattr(
+            lm_eval_task_data, "dataset_name", None
+        )
+        dataset_config = getattr(lm_eval_task_data, "dataset_config_name", None)
+        dataset_split = getattr(lm_eval_task_data, "fewshot_split", None)
+        if not dataset_name or not dataset_split:
+            return []
+        try:
+            from datasets import load_dataset
+        except Exception as exc:
+            task_name = getattr(
+                lm_eval_task_data, "NAME", type(lm_eval_task_data).__name__
+            )
+            raise RuntimeError(
+                f"Task '{task_name}' specifies dataset metadata but "
+                "the 'datasets' library is not available. "
+                "Install it via 'pip install datasets' to enable fallback loading."
+            ) from exc
+        try:
+            dataset = load_dataset(
+                dataset_name,
+                dataset_config if dataset_config else None,
+                split=dataset_split,
+            )
+        except Exception as exc:
+            task_name = getattr(lm_eval_task_data, "NAME", type(lm_eval_task_data).__name__)
+            raise RuntimeError(
+                f"Failed to load dataset split via fallback for task '{task_name}'. "
+                f"Arguments were: name={dataset_name!r}, config={dataset_config!r}, "
+                f"split={dataset_split!r}. Underlying error: {exc}"
+            ) from exc
+        return cls._coerce_docs_to_dicts(dataset, max_items)

wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py ADDED Viewed

@@ -0,0 +1,8 @@
+__all__ = [
+    "EXTRACTORS",
+]
+base_import: str = "wisent_guard.core.contrastive_pairs.lm_eval_pairs.lm_task_extractors."
+EXTRACTORS: dict[str, str] = {
+    # key → "module_path:ClassName" (supports dotted attr path after ':')
+    "winogrande": f"{base_import}winogrande:WinograndeExtractor",
+}

wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py ADDED Viewed

@@ -0,0 +1,132 @@
+from __future__ import annotations
+from typing import Type, Union
+import importlib
+import logging
+from wisent_guard.core.contrastive_pairs.lm_eval_pairs.atoms import (
+    LMEvalBenchmarkExtractor,
+    UnsupportedLMEvalBenchmarkError,
+)
+from wisent_guard.core.contrastive_pairs.lm_eval_pairs.lm_extractor_manifest import EXTRACTORS as _MANIFEST
+__all__ = [
+    "register_extractor",
+    "get_extractor",
+]
+LOG = logging.getLogger(__name__)
+_REGISTRY: dict[str, Union[str, Type[LMEvalBenchmarkExtractor]]] = dict(_MANIFEST)
+def register_extractor(name: str, ref: Union[str, Type[LMEvalBenchmarkExtractor]]) -> None:
+    """
+    Register a new extractor by name.
+    arguments:
+        name:
+            Name/key for the extractor (case-insensitive).
+        ref:
+            Either a string "module_path:ClassName[.Inner]" or a subclass of
+            LMEvalBenchmarkExtractor.
+    raises:
+        ValueError:
+            If the name is empty or the string ref is malformed.
+        TypeError:
+            If the ref class does not subclass LMEvalBenchmarkExtractor.
+    example:
+        >>> from wisent_guard.core.contrastive_pairs.lm_eval_pairs.lm_extractor_registry import register_extractor
+        >>> from wisent_guard.core.contrastive_pairs.lm_eval_pairs.atoms import LMEvalBenchmarkExtractor
+        >>> class MyExtractor(LMEvalBenchmarkExtractor): ...
+        >>> register_extractor("mytask", MyExtractor)
+        >>> register_extractor("mytask2", "my_module:MyExtractor")
+    """
+    key = (name or "").strip().lower()
+    if not key:
+        raise ValueError("Extractor name/key must be a non-empty string.")
+    if isinstance(ref, str):
+        if ":" not in ref:
+            raise ValueError("String ref must be 'module_path:ClassName[.Inner]'.")
+        _REGISTRY[key] = ref
+        return
+    if not issubclass(ref, LMEvalBenchmarkExtractor):
+        raise TypeError(f"{getattr(ref, '__name__', ref)!r} must subclass LMEvalBenchmarkExtractor")
+    _REGISTRY[key] = ref
+def get_extractor(task_name: str) -> LMEvalBenchmarkExtractor:
+    """
+    Retrieve a registered extractor by task name.
+    arguments:
+        task_name:
+            Name of the lm-eval benchmark/task (e.g., "winogrande").
+            Case-insensitive. Exact match only.
+    returns:
+        An instance of the corresponding LMEvalBenchmarkExtractor subclass.
+    raises:
+        UnsupportedLMEvalBenchmarkError:
+            If no extractor is registered for the given task name.
+        ImportError:
+            If the extractor class cannot be imported/resolved.
+        TypeError:
+            If the resolved class does not subclass LMEvalBenchmarkExtractor.
+    """
+    key = (task_name or "").strip().lower()
+    if not key:
+        raise UnsupportedLMEvalBenchmarkError("Empty task name is not supported.")
+    ref = _REGISTRY.get(key)
+    if ref:
+        return _instantiate(ref)
+    raise UnsupportedLMEvalBenchmarkError(
+        f"No extractor registered for task '{task_name}'. "
+        f"Known: {', '.join(sorted(_REGISTRY)) or '(none)'}"
+    )
+def _instantiate(ref: Union[str, Type[LMEvalBenchmarkExtractor]]) -> LMEvalBenchmarkExtractor:
+    """
+    Instantiate an extractor from a string reference or class.
+    arguments:
+        ref:
+            Either a string "module_path:ClassName[.Inner]" or a subclass of
+            LMEvalBenchmarkExtractor.
+    returns:
+        An instance of the corresponding LMEvalBenchmarkExtractor subclass.
+    raises:
+        ImportError:
+            If the extractor class cannot be imported/resolved.
+        TypeError:
+            If the resolved class does not subclass LMEvalBenchmarkExtractor.
+    """
+    if not isinstance(ref, str):
+        return ref()
+    module_path, attr_path = ref.split(":", 1)
+    try:
+        mod = importlib.import_module(module_path)
+    except Exception as exc:
+        raise ImportError(f"Cannot import module '{module_path}' for extractor '{ref}'.") from exc
+    obj = mod
+    for part in attr_path.split("."):
+        try:
+            obj = getattr(obj, part)
+        except AttributeError as exc:
+            raise ImportError(f"Extractor class '{attr_path}' not found in '{module_path}'.") from exc
+    if not isinstance(obj, type) or not issubclass(obj, LMEvalBenchmarkExtractor):
+        raise TypeError(f"Resolved object '{obj}' is not a LMEvalBenchmarkExtractor subclass.")
+    return obj()

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py ADDED Viewed

File without changes

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py ADDED Viewed

@@ -0,0 +1,115 @@
+from __future__ import annotations
+from typing import Any, TYPE_CHECKING
+from wisent_guard.core.contrastive_pairs.core.pair import ContrastivePair
+from wisent_guard.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
+from wisent_guard.core.contrastive_pairs.lm_eval_pairs.atoms import LMEvalBenchmarkExtractor
+from wisent_guard.cli.cli_logger import setup_logger, bind
+if TYPE_CHECKING:
+    from lm_eval.api.task import ConfigurableTask
+__all__ = ["WinograndeExtractor"]
+_LOG = setup_logger(__name__)
+class WinograndeExtractor(LMEvalBenchmarkExtractor):
+    """Extractor for the Winogrande benchmark."""
+    def extract_contrastive_pairs(
+        self,
+        lm_eval_task_data: ConfigurableTask,
+        limit: int | None = None,
+    ) -> list[ContrastivePair]:
+        """
+        Build contrastive pairs from Winogrande docs.
+        Winogrande schema:
+            - sentence: str (contains a blank)
+            - option1, option2: str
+            - answer: "1" or "2" (sometimes int-like)
+        Args:
+            lm_eval_task_data: lm-eval task instance for Winogrande.
+            limit: Optional maximum number of pairs to produce.
+        Returns:
+            A list of ContrastivePair objects.
+        """
+        log = bind(_LOG, task=getattr(lm_eval_task_data, "NAME", "unknown"))
+        max_items = self._normalize_limit(limit)
+        docs = self.load_docs(lm_eval_task_data, max_items)
+        pairs: list[ContrastivePair] = []
+        log.info("Extracting contrastive pairs", extra={"doc_count": len(docs)})
+        for doc in docs:
+            pair = self._extract_pair_from_doc(doc)
+            if pair is not None:
+                pairs.append(pair)
+                if max_items is not None and len(pairs) >= max_items:
+                    break
+        if not pairs:
+            task_name = getattr(lm_eval_task_data, "NAME", type(lm_eval_task_data).__name__)
+            log.warning("No valid Winogrande pairs extracted", extra={"task": task_name})
+        return pairs
+    def _extract_pair_from_doc(self, doc: dict[str, Any]) -> ContrastivePair | None:
+        """
+        Convert a single Winogrande doc into a ContrastivePair, if possible.
+        Returns None when required fields are missing or malformed.
+        """
+        log = bind(_LOG, doc_id=doc.get("id", "unknown"))
+        try:
+            sentence = str(doc.get("sentence", "")).strip()
+            option1 = str(doc.get("option1", "")).strip()
+            option2 = str(doc.get("option2", "")).strip()
+            raw_answer = doc.get("answer", "")
+            answer = str(raw_answer).strip()
+            if not sentence or not option1 or not option2 or answer not in {"1", "2"}:
+                log.debug(
+                    "Skipping doc due to missing/invalid fields",
+                    extra={"doc": doc},
+                )
+                return None
+            question = f"Complete the sentence: {sentence}"
+            formatted_question = f"{question}\nA. {option1}\nB. {option2}"
+            correct = option1 if answer == "1" else option2
+            incorrect = option2 if answer == "1" else option1
+            metadata = {
+                "label": "winogrande",
+            }
+            return self._build_pair(
+                question=formatted_question,
+                correct=correct,
+                incorrect=incorrect,
+                metadata=metadata,
+            )
+        except Exception as exc:
+            log.error("Error extracting pair from doc", exc_info=exc, extra={"doc": doc})
+            return None
+    @staticmethod
+    def _build_pair(
+        question: str,
+        correct: str,
+        incorrect: str,
+        metadata: dict[str, Any] | None = None,
+    ) -> ContrastivePair:
+        positive_response = PositiveResponse(model_response=correct)
+        negative_response = NegativeResponse(model_response=incorrect)
+        return ContrastivePair(prompt=question, positive_response=positive_response, negative_response=negative_response, label=metadata.get("label"))

wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py ADDED Viewed

@@ -0,0 +1,50 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from wisent_guard.core.contrastive_pairs.lm_eval_pairs.lm_extractor_registry import get_extractor
+from wisent_guard.cli.cli_logger import setup_logger, bind
+if TYPE_CHECKING:
+    from lm_eval.api.task import ConfigurableTask
+    from wisent_guard.core.contrastive_pairs.core.pair import ContrastivePair
+__all__ = ["build_contrastive_pairs"]
+_LOG = setup_logger(__name__)
+def lm_build_contrastive_pairs(
+    task_name: str,
+    lm_eval_task: ConfigurableTask,
+    limit: int | None = None,
+) -> list[ContrastivePair]:
+    """
+    Resolve the task's extractor (lazy-loaded) and return contrastive pairs.
+    arguments:
+        task_name:
+            Name of the lm-eval benchmark/task (e.g., "winogrande").
+        lm_eval_task:
+            An lm-eval task instance.
+        limit:
+            Optional upper bound on the number of pairs to return.
+            Values <= 0 are treated as "no limit".
+    returns:
+        A list of ContrastivePair objects.
+    """
+    log = bind(_LOG, task=task_name or "unknown")
+    log.info("Building contrastive pairs", extra={"limit": limit})
+    # 1) Get extractor instance by name (exact or longest-prefix)
+    extractor = get_extractor(task_name)
+    log.info("Using extractor", extra={"extractor": extractor.__class__.__name__})
+    # 2) Normalize limit (<=0 → None)
+    max_items = None if (limit is None or limit <= 0) else int(limit)
+    log.info("Extracting contrastive pairs", extra={"max_items": max_items})
+    # 3) Delegate: extractor loads docs and builds pairs
+    return extractor.extract_contrastive_pairs(lm_eval_task, limit=max_items)

wisent/core/data_loaders/__init__.py ADDED Viewed

File without changes

wisent/core/data_loaders/core/__init__.py ADDED Viewed

File without changes

wisent/core/data_loaders/core/atoms.py ADDED Viewed

@@ -0,0 +1,98 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+import inspect
+from typing import Any, Dict, Type
+from typing import TypedDict, Mapping
+from lm_eval.api.task import ConfigurableTask
+from wisent_guard.core.contrastive_pairs.core.set import ContrastivePairSet
+__all__ = ["DataLoaderError", "BaseDataLoader"]
+class LoadDataResult(TypedDict):
+    """
+    Structured output from a data loader used for training and evaluation.
+    attributes:
+        train_qa_pairs:
+            The training set of question-answer pairs.
+        test_qa_pairs:
+            The test set of question-answer pairs.
+        task_type:
+            The high-level task category (e.g., "classification").
+        lm_task_data:
+            Tasks in the 'lm_eval' repository format, if applicable.
+            When training/evaluating steering vectors with 'lm_eval', that
+            library is responsible for downloading and preprocessing the data,
+            and it provides the evaluation function that compares the steered
+            model to the baseline, see: https://github.com/EleutherAI/lm-evaluation-harness.
+            For custom data loaders, this is 'None'.
+    """
+    train_qa_pairs: ContrastivePairSet
+    test_qa_pairs: ContrastivePairSet
+    task_type: str
+    lm_task_data: Mapping[str, ConfigurableTask] | ConfigurableTask | None
+class DataLoaderError(RuntimeError):
+    """Raised when a data loader cannot complete loading."""
+class BaseDataLoader(ABC):
+    """Abstract data loader base. Concrete subclasses auto-register on import."""
+    name: str = "base"
+    description: str = "Abstract data loader"
+    _REGISTRY: Dict[str, Type["BaseDataLoader"]] = {}
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        if cls is BaseDataLoader:
+            return
+        if inspect.isabstract(cls):
+            return
+        if not getattr(cls, "name", None):
+            raise TypeError("DataLoader subclasses must define a class attribute `name`.")
+        if cls.name in BaseDataLoader._REGISTRY:
+            raise ValueError(f"Duplicate data loader name: {cls.name!r}")
+        BaseDataLoader._REGISTRY[cls.name] = cls
+    def __init__(self, **kwargs: Any) -> None:
+        self.kwargs: dict[str, Any] = dict(kwargs)
+    @staticmethod
+    def _effective_split(split_ratio: float | None) -> float:
+        """
+        Determine the effective split ratio, defaulting to 0.8 if None.
+        arguments:
+            split_ratio: Optional float in [0.0, 1.0] or None.
+        returns:
+            A float in [0.0, 1.0] representing the training split ratio.
+        raises:
+            ValueError if split_ratio is not in [0.0, 1.0].
+        """
+        if split_ratio is None:
+            return 0.8
+        if not (0.0 <= split_ratio <= 1.0):
+            raise ValueError("split_ratio must be in [0.0, 1.0]")
+        return float(split_ratio)
+    @abstractmethod
+    def load(self, **kwargs: Any) -> LoadDataResult:
+        """Return a LoadDataResult (train_qa_pairs, test_qa_pairs, task_type, lm_task_data)."""
+        raise NotImplementedError
+    @classmethod
+    def list_registered(cls) -> dict[str, Type["BaseDataLoader"]]:
+        return dict(cls._REGISTRY)
+    @classmethod
+    def get(cls, name: str) -> Type["BaseDataLoader"]:
+        try:
+            return cls._REGISTRY[name]
+        except KeyError as exc:
+            raise DataLoaderError(f"Unknown data loader: {name!r}") from exc

wisent/core/data_loaders/loaders/__init__.py ADDED Viewed

File without changes

wisent 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

wisent 0.1.1py3-none-any.whl → 0.5.1py3-none-any.whl