PyPI - wisent - Versions diffs - 0.1.1__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

wisent 0.1.1py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (237) hide show

wisent/__init__.py +1 -8
wisent/benchmarks/__init__.py +0 -0
wisent/benchmarks/coding/__init__.py +0 -0
wisent/benchmarks/coding/metrics/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
wisent/benchmarks/coding/metrics/evaluator.py +275 -0
wisent/benchmarks/coding/metrics/passk.py +66 -0
wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
wisent/benchmarks/coding/providers/__init__.py +18 -0
wisent/benchmarks/coding/providers/core/__init__.py +0 -0
wisent/benchmarks/coding/providers/core/atoms.py +31 -0
wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
wisent/classifiers/__init__.py +0 -0
wisent/classifiers/core/__init__.py +0 -0
wisent/classifiers/core/atoms.py +747 -0
wisent/classifiers/models/__init__.py +0 -0
wisent/classifiers/models/logistic.py +29 -0
wisent/classifiers/models/mlp.py +47 -0
wisent/cli/__init__.py +0 -0
wisent/cli/classifiers/__init__.py +0 -0
wisent/cli/classifiers/classifier_rotator.py +137 -0
wisent/cli/cli_logger.py +142 -0
wisent/cli/data_loaders/__init__.py +0 -0
wisent/cli/data_loaders/data_loader_rotator.py +96 -0
wisent/cli/evaluators/__init__.py +0 -0
wisent/cli/evaluators/evaluator_rotator.py +148 -0
wisent/cli/steering_methods/__init__.py +0 -0
wisent/cli/steering_methods/steering_rotator.py +110 -0
wisent/cli/wisent_cli/__init__.py +0 -0
wisent/cli/wisent_cli/commands/__init__.py +0 -0
wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
wisent/cli/wisent_cli/commands/listing.py +154 -0
wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
wisent/cli/wisent_cli/main.py +93 -0
wisent/cli/wisent_cli/shell.py +80 -0
wisent/cli/wisent_cli/ui.py +69 -0
wisent/cli/wisent_cli/util/__init__.py +0 -0
wisent/cli/wisent_cli/util/aggregations.py +43 -0
wisent/cli/wisent_cli/util/parsing.py +126 -0
wisent/cli/wisent_cli/version.py +4 -0
wisent/core/__init__.py +27 -0
wisent/core/activations/__init__.py +0 -0
wisent/core/activations/activations_collector.py +338 -0
wisent/core/activations/core/__init__.py +0 -0
wisent/core/activations/core/atoms.py +216 -0
wisent/core/agent/__init__.py +18 -0
wisent/core/agent/budget.py +638 -0
wisent/core/agent/device_benchmarks.py +685 -0
wisent/core/agent/diagnose/__init__.py +55 -0
wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
wisent/core/agent/diagnose/create_classifier.py +1154 -0
wisent/core/agent/diagnose/response_diagnostics.py +268 -0
wisent/core/agent/diagnose/select_classifiers.py +506 -0
wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
wisent/core/agent/diagnose/tasks/__init__.py +33 -0
wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
wisent/core/agent/diagnose.py +242 -0
wisent/core/agent/steer.py +212 -0
wisent/core/agent/timeout.py +134 -0
wisent/core/autonomous_agent.py +1234 -0
wisent/core/bigcode_integration.py +583 -0
wisent/core/contrastive_pairs/__init__.py +15 -0
wisent/core/contrastive_pairs/core/__init__.py +0 -0
wisent/core/contrastive_pairs/core/atoms.py +45 -0
wisent/core/contrastive_pairs/core/buliders.py +59 -0
wisent/core/contrastive_pairs/core/pair.py +178 -0
wisent/core/contrastive_pairs/core/response.py +152 -0
wisent/core/contrastive_pairs/core/serialization.py +300 -0
wisent/core/contrastive_pairs/core/set.py +133 -0
wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
wisent/core/data_loaders/__init__.py +0 -0
wisent/core/data_loaders/core/__init__.py +0 -0
wisent/core/data_loaders/core/atoms.py +98 -0
wisent/core/data_loaders/loaders/__init__.py +0 -0
wisent/core/data_loaders/loaders/custom.py +120 -0
wisent/core/data_loaders/loaders/lm_loader.py +218 -0
wisent/core/detection_handling.py +257 -0
wisent/core/download_full_benchmarks.py +1386 -0
wisent/core/evaluators/__init__.py +0 -0
wisent/core/evaluators/oracles/__init__.py +0 -0
wisent/core/evaluators/oracles/interactive.py +73 -0
wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
wisent/core/evaluators/oracles/user_specified.py +67 -0
wisent/core/hyperparameter_optimizer.py +429 -0
wisent/core/lm_eval_harness_ground_truth.py +1396 -0
wisent/core/log_likelihoods_evaluator.py +321 -0
wisent/core/managed_cached_benchmarks.py +595 -0
wisent/core/mixed_benchmark_sampler.py +364 -0
wisent/core/model_config_manager.py +330 -0
wisent/core/model_persistence.py +317 -0
wisent/core/models/__init__.py +0 -0
wisent/core/models/core/__init__.py +0 -0
wisent/core/models/core/atoms.py +460 -0
wisent/core/models/wisent_model.py +727 -0
wisent/core/multi_steering.py +316 -0
wisent/core/optuna/__init__.py +57 -0
wisent/core/optuna/classifier/__init__.py +25 -0
wisent/core/optuna/classifier/activation_generator.py +349 -0
wisent/core/optuna/classifier/classifier_cache.py +509 -0
wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
wisent/core/optuna/steering/__init__.py +0 -0
wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
wisent/core/optuna/steering/data_utils.py +342 -0
wisent/core/optuna/steering/metrics.py +474 -0
wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
wisent/core/optuna/steering/steering_optimization.py +1111 -0
wisent/core/parser.py +1668 -0
wisent/core/prompts/__init__.py +0 -0
wisent/core/prompts/core/__init__.py +0 -0
wisent/core/prompts/core/atom.py +57 -0
wisent/core/prompts/core/prompt_formater.py +157 -0
wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
wisent/core/representation.py +5 -0
wisent/core/sample_size_optimizer.py +648 -0
wisent/core/sample_size_optimizer_v2.py +355 -0
wisent/core/save_results.py +277 -0
wisent/core/steering.py +652 -0
wisent/core/steering_method.py +26 -0
wisent/core/steering_methods/__init__.py +0 -0
wisent/core/steering_methods/core/__init__.py +0 -0
wisent/core/steering_methods/core/atoms.py +153 -0
wisent/core/steering_methods/methods/__init__.py +0 -0
wisent/core/steering_methods/methods/caa.py +44 -0
wisent/core/steering_optimizer.py +1297 -0
wisent/core/task_interface.py +132 -0
wisent/core/task_selector.py +189 -0
wisent/core/tasks/__init__.py +175 -0
wisent/core/tasks/aime_task.py +141 -0
wisent/core/tasks/file_task.py +211 -0
wisent/core/tasks/hle_task.py +180 -0
wisent/core/tasks/hmmt_task.py +119 -0
wisent/core/tasks/livecodebench_task.py +201 -0
wisent/core/tasks/livemathbench_task.py +158 -0
wisent/core/tasks/lm_eval_task.py +455 -0
wisent/core/tasks/math500_task.py +84 -0
wisent/core/tasks/polymath_task.py +146 -0
wisent/core/tasks/supergpqa_task.py +220 -0
wisent/core/time_estimator.py +149 -0
wisent/core/timing_calibration.py +174 -0
wisent/core/tracking/__init__.py +54 -0
wisent/core/tracking/latency.py +618 -0
wisent/core/tracking/memory.py +359 -0
wisent/core/trainers/__init__.py +0 -0
wisent/core/trainers/core/__init__.py +11 -0
wisent/core/trainers/core/atoms.py +45 -0
wisent/core/trainers/steering_trainer.py +271 -0
wisent/core/user_model_config.py +158 -0
wisent/opti/__init__.py +0 -0
wisent/opti/core/__init__.py +0 -0
wisent/opti/core/atoms.py +175 -0
wisent/opti/methods/__init__.py +0 -0
wisent/opti/methods/opti_classificator.py +172 -0
wisent/opti/methods/opti_steering.py +138 -0
wisent/synthetic/__init__.py +0 -0
wisent/synthetic/cleaners/__init__.py +0 -0
wisent/synthetic/cleaners/core/__init__.py +0 -0
wisent/synthetic/cleaners/core/atoms.py +58 -0
wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
wisent/synthetic/cleaners/methods/__init__.py +0 -0
wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
wisent/synthetic/db_instructions/__init__.py +0 -0
wisent/synthetic/db_instructions/core/__init__.py +0 -0
wisent/synthetic/db_instructions/core/atoms.py +25 -0
wisent/synthetic/db_instructions/mini_dp.py +37 -0
wisent/synthetic/generators/__init__.py +0 -0
wisent/synthetic/generators/core/__init__.py +0 -0
wisent/synthetic/generators/core/atoms.py +73 -0
wisent/synthetic/generators/diversities/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/core.py +68 -0
wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
wisent/synthetic/generators/pairs_generator.py +179 -0
wisent-0.5.2.dist-info/METADATA +67 -0
wisent-0.5.2.dist-info/RECORD +218 -0
{wisent-0.1.1.dist-info → wisent-0.5.2.dist-info}/WHEEL +1 -1
{wisent-0.1.1.dist-info → wisent-0.5.2.dist-info/licenses}/LICENSE +2 -2
wisent/activations/__init__.py +0 -9
wisent/activations/client.py +0 -97
wisent/activations/extractor.py +0 -251
wisent/activations/models.py +0 -95
wisent/client.py +0 -45
wisent/control_vector/__init__.py +0 -9
wisent/control_vector/client.py +0 -85
wisent/control_vector/manager.py +0 -168
wisent/control_vector/models.py +0 -70
wisent/inference/__init__.py +0 -9
wisent/inference/client.py +0 -103
wisent/inference/inferencer.py +0 -250
wisent/inference/models.py +0 -66
wisent/utils/__init__.py +0 -3
wisent/utils/auth.py +0 -30
wisent/utils/http.py +0 -228
wisent/version.py +0 -3
wisent-0.1.1.dist-info/METADATA +0 -142
wisent-0.1.1.dist-info/RECORD +0 -23
{wisent-0.1.1.dist-info → wisent-0.5.2.dist-info}/top_level.txt +0 -0

wisent/benchmarks/coding/output_sanitizer/utils.py ADDED Viewed

@@ -0,0 +1,107 @@
+# coding/llm_sanitizer/util.py
+from __future__ import annotations
+import re
+from textwrap import dedent
+_FENCE_RE = re.compile(
+    r"```(?P<lang>[a-zA-Z0-9_+-]*)\s*\n(?P<code>.*?)(?:```|$)", re.DOTALL
+)
+def extract_code_block(raw: str, prefer_langs=("python","py","cpp","c++","java")) -> str:
+    """
+    Return the best-looking fenced code block; else the raw text.
+    Args:
+        raw:
+            The raw text possibly containing fenced code blocks.
+        prefer_langs:
+            Languages to prefer when selecting a code block.
+    Returns:
+        The extracted code block, or the raw text if no fenced blocks found.
+    Examples:
+        >>> extract_code_block("Here is some code:\\n```python\\ndef foo(): pass\\n```")
+        'def foo(): pass'
+        >>> extract_code_block("No code blocks here.")
+        'No code blocks here.'
+        >>> extract_code_block("Multiple:\\n```java\\nclass A {}\\n```\\n```python\\ndef f(): pass\\n```")
+        'def f(): pass'
+    """
+    matches = list(_FENCE_RE.finditer(raw))
+    if not matches:
+        return strip_triple_quotes(raw)
+    def score(m):
+        lang = (m.group("lang") or "").lower()
+        pref = 1 if lang in prefer_langs else 0
+        return (pref, len(m.group("code")))
+    m = max(matches, key=score)
+    return m.group("code").strip()
+def strip_triple_quotes(s: str) -> str:
+    """
+    If the string is wrapped in triple quotes, strip them.
+    Args:
+        s:
+            The input string.
+    Returns:
+        The string with triple quotes removed if they were present.
+    Examples:
+        >>> strip_triple_quotes('\"\"\"def foo(): pass\"\"\"')
+        'def foo(): pass'
+        >>> strip_triple_quotes("'''def foo(): pass'''")
+        'def foo(): pass'
+        >>> strip_triple_quotes('def foo(): pass')
+        'def foo(): pass'
+    """
+    s = s.strip()
+    if s.startswith('"""') and s.endswith('"""'):
+        return s[3:-3].strip()
+    if s.startswith("'''") and s.endswith("'''"):
+        return s[3:-3].strip()
+    return s
+def normalize_whitespace(code: str) -> str:
+    """
+    Normalize line endings to LF, dedent, and strip leading/trailing whitespace.
+    arguments:
+        code:
+            The input code string.
+    returns:
+        The normalized code string.
+    examples:
+        >>> normalize_whitespace("  def foo():\\n    pass  ")
+        'def foo():\\n    pass'
+        >>> normalize_whitespace("def foo():\\r\\n    pass\\r")
+        'def foo():\\n    pass'
+    """
+    code = code.replace("\r\n","\n").replace("\r","\n")
+    code = dedent(code).strip()
+    return code
+def maybe_black(code: str) -> str:
+    """
+    If Black is installed, format; otherwise return as-is.
+    arguments:
+        code:
+            The input Python code string.
+    returns:
+        The formatted code string if Black is available; else the original code.
+    examples:
+        >>> maybe_black("def foo():pass")
+        'def foo():\\n    pass\\n'
+    """
+    try:
+        import black
+        return black.format_str(code, mode=black.FileMode())
+    except Exception:
+        return code

wisent/benchmarks/coding/providers/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+# coding/providers/core/atoms.py
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, Iterable, Protocol, Literal
+Language = Literal["python", "cpp", "java"]
+@dataclass(frozen=True)
+class CodingTask:
+    """A normalized task with language + harness files to be executed."""
+    language: Language
+    files: Dict[str, str]          # e.g., {"solution.py": "...", "tests.py": "..."} or C++/Java equivalents
+    options: Dict[str, object]     # e.g., {"cxx_std": "c++20", "java_main": "MainTest"}
+class Provider(Protocol):
+    """Dataset provider yields tasks (codegen or self-repair compatible)."""
+    name: str
+    def iter_tasks(self, split: str = "test") -> Iterable[CodingTask]: ...

wisent/benchmarks/coding/providers/core/__init__.py ADDED Viewed

File without changes

wisent/benchmarks/coding/providers/core/atoms.py ADDED Viewed

@@ -0,0 +1,31 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Iterable, Protocol, Literal
+Language = Literal["python", "cpp", "java"]
+@dataclass(frozen=True)
+class CodingTask:
+    """
+    A normalized task with language + harness files to be executed.
+    attributes:
+        language:
+            The programming language of the task.
+        files:
+            A dictionary mapping filenames to their content. For example,
+            {"solution.py": "...", "tests.py": "..."} for Python tasks,
+            or equivalent files for C++/Java tasks.
+        options:
+            A dictionary of additional options that may be required for
+            execution. For example, {"cxx_std": "c++20"} for C++ tasks,
+            or {"java_main": "MainTest"} for Java tasks.
+    """
+    language: Language
+    files: dict[str, str]
+    options: dict[str, object]
+class Provider(Protocol):
+    """Dataset provider yields tasks (codegen or self-repair compatible)."""
+    name: str
+    def iter_tasks(self, split: str = "test") -> Iterable[CodingTask]: ...

wisent/benchmarks/coding/providers/livecodebench/__init__.py ADDED Viewed

File without changes

wisent/benchmarks/coding/providers/livecodebench/provider.py ADDED Viewed

@@ -0,0 +1,53 @@
+# coding/providers/livecodebench/provider.py
+from __future__ import annotations
+from typing import Iterable
+from ..core.atoms import CodingTask, Language
+class LiveCodeBenchProvider:
+    """
+    Sketch adapter: load LiveCodeBench (code_generation_lite) and render tasks.
+    Note: HF card states it's used for self-repair with test case feedback too.
+    """
+    name = "livecodebench"
+    def __init__(self, language: Language = "python"):
+        self.language = language
+    def iter_tasks(self, split: str = "test") -> Iterable[CodingTask]:
+        # placeholder: integrate HF datasets on your host and transform each row
+        # according to `self.language` into {files} + options.
+        # HF dataset card: "also used for self-repair using test case feedback".
+        # https://huggingface.co/datasets/livecodebench/code_generation_lite
+        # (Keep this stub lean; real impl will map test templates per language.)
+        # Yield a toy one so examples work:
+        if self.language == "python":
+            yield CodingTask(
+                language="python",
+                files={
+                    "solution.py": "def add(a,b): return a - b  # BUG",
+                    "tests.py": "from solution import add\n"
+                                "def test_ok(): assert add(1,2)==3\n"
+                                "def test_neg(): assert add(-5,2)==-3\n"
+                },
+                options={}
+            )
+        elif self.language == "cpp":
+            yield CodingTask(
+                language="cpp",
+                files={
+                    "solution.cpp":"int add(int a,int b){return a-b;}",
+                    "test_main.cpp":"#include <cassert>\nint add(int,int);\nint main(){assert(add(1,2)==3);assert(add(-5,2)==-3);return 0;}"
+                },
+                options={"cxx_std":"c++17"}
+            )
+        else:  # java
+            yield CodingTask(
+                language="java",
+                files={
+                    "Solution.java":"public class Solution{public static int add(int a,int b){return a-b;}}",
+                    "MainTest.java":"public class MainTest{public static void main(String[]a){"
+                                    "if(Solution.add(1,2)!=3)throw new RuntimeException(\"f1\");"
+                                    "if(Solution.add(-5,2)!=-3)throw new RuntimeException(\"f2\");}}"
+                },
+                options={"java_main":"MainTest"}
+            )

wisent/benchmarks/coding/safe_docker/__init__.py ADDED Viewed

File without changes

wisent/benchmarks/coding/safe_docker/core/__init__.py ADDED Viewed

File without changes

wisent/benchmarks/coding/safe_docker/core/atoms.py ADDED Viewed

@@ -0,0 +1,105 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Protocol, runtime_checkable
+__all__ = ["Job", "Result", "LanguageRecipe", "SandboxExecutor"]
+@dataclass(frozen=True)
+class Job:
+    """How to build + run a submission inside a sandbox.
+    attributes:
+        language:
+            Programming language, e.g. "python", "cpp", "java".
+        compile_argv:
+            If not None, argv to compile the code (e.g. ["g++", "-o", "program", "solution.cpp"]).
+            If None, no compilation step is done.
+        run_argv:
+            argv to run the code (e.g. ["./program"] or ["python3", "solution.py"]).
+        cpu_limit_s:
+            CPU time limit in seconds (e.g. 3).
+        wall_timeout_s:
+            Wall clock timeout in seconds (e.g. 8).
+        mem_limit_mb:
+            Memory limit in megabytes (e.g. 4096).
+        fsize_mb:
+            Max file size in megabytes (e.g. 16).
+        nproc:
+            Max number of processes/threads (e.g. 128).
+        nofile:
+            Max number of open files (e.g. 512).
+    example:
+        >>> job = Job(
+        >>>     language="python",
+        >>>     compile_argv=None,
+        >>>     run_argv=["python3", "solution.py"],
+        >>>     cpu_limit_s=3,
+        >>>     wall_timeout_s=8,
+        >>>     mem_limit_mb=4096,
+        >>>     fsize_mb=16,
+        >>>     nproc=128,
+        >>>     nofile=512,
+        >>> )
+    """
+    language: str
+    compile_argv: list[str] | None
+    run_argv: list[str]
+    cpu_limit_s: int = 3
+    wall_timeout_s: int = 8
+    mem_limit_mb: int = 4096
+    fsize_mb: int = 16
+    nproc: int = 128
+    nofile: int = 512
+@dataclass(frozen=True)
+class Result:
+    """
+    Result of running a Job inside a sandbox.
+    attributes:
+        status:
+            One of "ok", "compile_error", "runtime_error", "timeout".
+        exit_code:
+            Exit code of the program (or compiler), or -1 if killed by timeout or OOM.
+        stdout:
+            Captured standard output (max 32k chars).
+        stderr:
+            Captured standard error (max 32k chars).
+        elapsed:
+            Wall clock time elapsed in seconds (float).
+    example:
+        >>> res = Result(
+        >>>     status="ok",
+        >>>     exit_code=0,
+        >>>     stdout="Hello, world!",
+        >>>     stderr="",
+        >>>     elapsed=1.23,
+        >>> )
+    """
+    status: str
+    exit_code: int
+    stdout: str
+    stderr: str
+    elapsed: float
+@runtime_checkable
+class LanguageRecipe(Protocol):
+    """
+    Knows how to create a Job for a given language and set of files.
+    attributes:
+        language:
+            The programming language this recipe supports, e.g. "python", "cpp", "java".
+    """
+    language: str
+    def make_job(self, **options) -> Job: ...
+@runtime_checkable
+class SandboxExecutor(Protocol):
+    """
+    Executes a Job inside a sandbox, given a read-only job dir of files.
+    """
+    def run(self, files: dict[str, str], job: Job) -> Result: ...

wisent/benchmarks/coding/safe_docker/core/runtime.py ADDED Viewed

@@ -0,0 +1,118 @@
+from __future__ import annotations
+import json, os, subprocess, tempfile
+from typing import TYPE_CHECKING
+from wisent.benchmarks.coding.safe_docker.core.atoms import Result, SandboxExecutor
+if TYPE_CHECKING:
+    from wisent.benchmarks.coding.safe_docker.core.atoms import Job
+__all__ = ["DockerSandboxExecutor"]
+DEFAULT_IMAGE = "coding/sandbox:polyglot-1.0"
+SAFE_FLAGS = [
+    "--rm", "--network=none",
+    "--pids-limit=256",
+    "--read-only",
+    "--cap-drop=ALL",
+    "--security-opt=no-new-privileges",
+]
+TMPFS_FLAGS = [
+    "--tmpfs", "/tmp:exec,mode=1777,size=134217728",
+    "--tmpfs", "/work:exec,mode=1777,size=268435456",
+]
+class DockerSandboxExecutor(SandboxExecutor):
+    """
+    Executes a Job inside a Docker container, given a read-only job dir of files.
+    """
+    def __init__(self, image: str = DEFAULT_IMAGE, runtime: str | None = None):
+        self.image = image
+        self.runtime = runtime
+    def run(self, files: dict[str, str], job: Job) -> Result:
+        """
+        Runs a Job inside a Docker container, given a read-only job dir of files.
+        arguments:
+            files:
+                A mapping of filename to file content, representing the job directory.
+            job:
+                The Job to execute.
+        exceptions:
+            Raises subprocess.CalledProcessError if the `docker` command itself fails.
+        returns:
+            A Result object with the outcome of the execution.
+        example (pythonm add function)
+        >>> from wisent.benchmarks.coding.safe_docker.core.atoms import Job, Result
+        >>> from wisent.benchmarks.coding.safe_docker.core.runtime import DockerSandboxExecutor
+        >>> job = Job(
+        ...     language="python",
+        ...     compile_argv=None,
+        ...     run_argv=["python3", "/job/tests.py"],
+        ...     cpu_limit_s=2,
+        ...     wall_timeout_s=5,
+        ...     mem_limit_mb=256,
+        ... )
+        >>> files = {
+        ...     "solution.py": "def add(a,b): return a + b",
+        ...     "tests.py": "from solution import add\ndef test_ok(): assert add(1,2)==3",
+        ... }
+        >>> res: Result = DockerSandboxExecutor().run(files, job)
+        >>> res.status
+        'ok'
+        >>> res.exit_code
+        0
+        >>> res.stdout
+        'test_ok passed'
+        >>> res.stderr
+        ''
+        >>> round(res.elapsed, 2)
+        0.23
+        """
+        with tempfile.TemporaryDirectory() as tmp:
+            job_dir = os.path.join(tmp, "job")
+            os.makedirs(job_dir, exist_ok=True)
+            for name, content in files.items():
+                with open(os.path.join(job_dir, name), "w", encoding="utf-8") as f:
+                    f.write(content)
+            with open(os.path.join(job_dir, "job.json"), "w", encoding="utf-8") as f:
+                json.dump({
+                    "language": job.language,
+                    "compile": {"argv": job.compile_argv} if job.compile_argv else None,
+                    "run": {"argv": job.run_argv},
+                    "cpu_limit_s": job.cpu_limit_s,
+                    "wall_timeout_s": job.wall_timeout_s,
+                    "mem_limit_mb": job.mem_limit_mb,
+                    "fsize_mb": job.fsize_mb,
+                    "nproc": job.nproc,
+                    "nofile": job.nofile,
+                }, f)
+            base = ["docker"]
+            if self.runtime:
+                base += ["--runtime", self.runtime]
+            cmd = base + ["run", "-i", *SAFE_FLAGS, *TMPFS_FLAGS, "-v", f"{job_dir}:/job:ro", self.image]
+            p = subprocess.run(cmd, check=False, capture_output=True, text=True)
+            out = (p.stdout or "").strip()
+            try:
+                payload = json.loads(out)
+            except json.JSONDecodeError:
+                return Result(
+                    status="runtime_error",
+                    exit_code=p.returncode,
+                    stdout=p.stdout or "",
+                    stderr=p.stderr or "Failed to parse executor output as JSON.",
+                    elapsed=0.0,
+                )
+            return Result(
+                status=payload.get("status","runtime_error"),
+                exit_code=int(payload.get("exit_code", p.returncode)),
+                stdout=payload.get("stdout",""),
+                stderr=payload.get("stderr",""),
+                elapsed=float(payload.get("elapsed",0.0)),
+            )

wisent/benchmarks/coding/safe_docker/entrypoint.py ADDED Viewed

@@ -0,0 +1,123 @@
+from __future__ import annotations
+import json, os, shutil, subprocess, sys, time, signal, resource
+from wisent.benchmarks.coding.safe_docker.core.atoms import Job
+JOB_FILE = "/job/job.json"
+WORKDIR = "/work"
+def set_limits(job: Job):
+    """
+    Set resource limits for the sandboxed process.
+    attributes:
+        job:
+            A Job object containing resource limit parameters.
+    example:
+    """
+    resource.setrlimit(resource.RLIMIT_CPU,  (job["cpu_limit_s"],)*2)
+    resource.setrlimit(resource.RLIMIT_AS,   (job["mem_limit_mb"]*1024*1024,)*2)
+    resource.setrlimit(resource.RLIMIT_FSIZE,(job["fsize_mb"]*1024*1024,)*2)
+    resource.setrlimit(resource.RLIMIT_NPROC,(job["nproc"],)*2)
+    resource.setrlimit(resource.RLIMIT_NOFILE,(job["nofile"],)*2)
+    resource.setrlimit(resource.RLIMIT_CORE,(0,0))
+    os.setsid()
+def run(argv: list[str], job: Job) -> tuple[int,str,str,float,str]:
+    """
+    Run a command in a subprocess with resource limits.
+    attributes:
+        argv:
+            Command and arguments to run as a list of strings.
+        job:
+            A Job object containing resource limit parameters.
+    returns:
+        A tuple containing:
+            - exit code (int)
+            - standard output (str)
+            - standard error (str)
+            - elapsed time in seconds (float)
+            - status (str): "ok", "nonzero", "timeout", "missing", or "error"
+    example:
+            >>> code, out, err, elapsed, status = run(["python3", "solution.py"], job)
+            >>> print(status)
+            "ok"
+            >>> print(elapsed)
+            0.123
+            >>> print(out)
+            "Hello, world!"
+            >>> print(err)
+            ""
+    """
+    start = time.time()
+    try:
+        p = subprocess.Popen(argv, cwd=WORKDIR, text=True,
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                             preexec_fn=lambda: set_limits(job))
+        try:
+            out, err = p.communicate(timeout=job["wall_timeout_s"])
+        except subprocess.TimeoutExpired:
+            try: os.killpg(p.pid, signal.SIGKILL)
+            except Exception: pass
+            return 124, "", f"Time limit exceeded ({job['wall_timeout_s']}s)\n", time.time()-start, "timeout"
+        status = "ok" if p.returncode == 0 else "nonzero"
+        return p.returncode, out, err, time.time()-start, status
+    except FileNotFoundError as e:
+        return 127, "", f"{e}\n", time.time()-start, "missing"
+    except Exception as e:
+        return 1, "", f"{e}\n", time.time()-start, "error"
+def copy_job():
+    """
+    Copy job files from /job to /work directory.
+    """
+    os.makedirs(WORKDIR, exist_ok=True)
+    for root, _, files in os.walk("/job"):
+        rel = os.path.relpath(root, "/job")
+        dst = os.path.join(WORKDIR, "" if rel == "." else rel)
+        os.makedirs(dst, exist_ok=True)
+        for f in files:
+            shutil.copy2(os.path.join(root, f), os.path.join(dst, f))
+def main():
+    """
+    Main function to execute the job defined in /job/job.json.
+    returns:
+        Exit code 0 on success, 2 if job file is missing.
+    """
+    if not os.path.exists(JOB_FILE):
+        print("Missing /job/job.json", file=sys.stderr); return 2
+    with open(JOB_FILE, "r", encoding="utf-8") as f:
+        job = json.load(f)
+    copy_job()
+    # optional quick syntax check for Python
+    if job["language"] == "python":
+        _, _, err, _, _ = run([sys.executable, "-m", "py_compile", "solution.py"], job)
+        if err:
+            print(json.dumps({"status":"compile_error","stdout":"","stderr":err,"elapsed":0.0,"exit_code":1}))
+            return 0
+    if job.get("compile"):
+        code, out, err, el, _ = run(job["compile"]["argv"], job)
+        if code != 0:
+            print(json.dumps({"status":"compile_error","stdout":out,"stderr":err,"elapsed":el,"exit_code":code}))
+            return 0
+    code, out, err, el, status = run(job["run"]["argv"], job)
+    payload = {
+        "status": "ok" if code == 0 else ("timeout" if status == "timeout" else "runtime_error"),
+        "stdout": out, "stderr": err, "elapsed": el, "exit_code": code
+    }
+    print(json.dumps(payload))
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

wisent/benchmarks/coding/safe_docker/recipes.py ADDED Viewed

@@ -0,0 +1,60 @@
+from __future__ import annotations
+from typing import Dict
+from wisent.benchmarks.coding.safe_docker.core.atoms import Job, LanguageRecipe
+class PythonRecipe(LanguageRecipe):
+    """
+     Recipe for running Python code in a sandboxed environment.
+    """
+    language = "python"
+    def make_job(self, **options) -> Job:
+        run = ["pytest", "-q", "--maxfail=1", "--tb=short", "-rA", "tests.py"]
+        return Job(language="python", compile_argv=None, run_argv=run,
+                   cpu_limit_s=options.get("cpu_limit_s",3),
+                   wall_timeout_s=options.get("time_limit_s",8),
+                   mem_limit_mb=options.get("mem_limit_mb",768))
+class CppRecipe(LanguageRecipe):
+    language = "cpp"
+    def make_job(self, **options) -> Job:
+        std = options.get("cxx_std", "c++17")
+        compile_cmd = ["bash","-lc", f"g++ -std={std} -O2 -pipe -o program solution.cpp test_main.cpp"]
+        run_cmd = ["bash","-lc","./program"]
+        return Job(language="cpp", compile_argv=compile_cmd, run_argv=run_cmd,
+                   cpu_limit_s=options.get("cpu_limit_s",3),
+                   wall_timeout_s=options.get("time_limit_s",8),
+                   mem_limit_mb=options.get("mem_limit_mb",768))
+class JavaRecipe:
+    language = "java"
+    def make_job(self, **options) -> Job:
+        main = options.get("java_main", "MainTest")
+        java_opts = options.get(
+            "java_opts",
+            "-Xms32m -Xmx256m -Xss512k "
+            "-XX:CompressedClassSpaceSize=64m "
+            "-XX:MaxMetaspaceSize=128m "
+            "-XX:ReservedCodeCacheSize=64m "
+            "-XX:MaxDirectMemorySize=64m "
+            "-XX:+UseSerialGC -XX:+ExitOnOutOfMemoryError"
+        )
+        compile_cmd = ["bash", "-lc", "javac *.java"]
+        run_cmd = ["bash", "-lc", f"java {java_opts} {main}"]
+        return Job(
+            language="java",
+            compile_argv=compile_cmd,
+            run_argv=run_cmd,
+            cpu_limit_s=options.get("cpu_limit_s", 3),
+            wall_timeout_s=options.get("time_limit_s", 8),
+            mem_limit_mb=options.get("mem_limit_mb", 768),
+        )
+RECIPE_REGISTRY = {
+    "python": PythonRecipe(),
+    "cpp": CppRecipe(),
+    "java": JavaRecipe(),
+}

wisent/classifiers/__init__.py ADDED Viewed

File without changes

wisent/classifiers/core/__init__.py ADDED Viewed

File without changes

wisent 0.1.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

wisent 0.1.1py3-none-any.whl → 0.5.2py3-none-any.whl