PyPI - themis-eval - Versions diffs - 0.2.1__tar.gz → 0.2.2__tar.gz - Mend

themis-eval 0.2.1tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

{themis_eval-0.2.1/themis_eval.egg-info → themis_eval-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: themis-eval
-Version: 0.2.1
+Version: 0.2.2
 Summary: Lightweight evaluation platform for LLM experiments
 Author: Pittawat Taveekitworachai
 License: MIT
@@ -25,6 +25,7 @@ Requires-Dist: tabulate>=0.9.0
 Requires-Dist: tenacity>=9.1.2
 Requires-Dist: plotly>=6.5.0
 Requires-Dist: math-verify>=0.8.0
+Requires-Dist: rich>=14.2.0
 Provides-Extra: dev
 Requires-Dist: pytest>=8.0; extra == "dev"
 Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
@@ -358,9 +359,9 @@ Themis is built on a clean, modular architecture:
 - **[API Reference](docs/index.md)** - Detailed API documentation
 - **[Examples](examples-simple/)** - Runnable code examples
-- **[Extending Backends](docs/EXTENDING_BACKENDS.md)** - Custom storage and execution
-- **[API Server](docs/API_SERVER.md)** - Web dashboard and REST API
-- **[Comparison Engine](docs/COMPARISON.md)** - Statistical testing guide
+- **[Extending Backends](docs/customization/backends.md)** - Custom storage and execution
+- **[API Server](docs/reference/api-server.md)** - Web dashboard and REST API
+- **[Comparison Engine](docs/guides/comparison.md)** - Statistical testing guide
 ---
@@ -388,7 +389,7 @@ result = evaluate(
 )
 ```
-See [EXTENDING_BACKENDS.md](docs/EXTENDING_BACKENDS.md) for details.
+See [docs/customization/backends.md](docs/customization/backends.md) for details.
 ### Distributed Execution

{themis_eval-0.2.1 → themis_eval-0.2.2}/README.md RENAMED Viewed

@@ -300,9 +300,9 @@ Themis is built on a clean, modular architecture:
 - **[API Reference](docs/index.md)** - Detailed API documentation
 - **[Examples](examples-simple/)** - Runnable code examples
-- **[Extending Backends](docs/EXTENDING_BACKENDS.md)** - Custom storage and execution
-- **[API Server](docs/API_SERVER.md)** - Web dashboard and REST API
-- **[Comparison Engine](docs/COMPARISON.md)** - Statistical testing guide
+- **[Extending Backends](docs/customization/backends.md)** - Custom storage and execution
+- **[API Server](docs/reference/api-server.md)** - Web dashboard and REST API
+- **[Comparison Engine](docs/guides/comparison.md)** - Statistical testing guide
 ---
@@ -330,7 +330,7 @@ result = evaluate(
 )
 ```
-See [EXTENDING_BACKENDS.md](docs/EXTENDING_BACKENDS.md) for details.
+See [docs/customization/backends.md](docs/customization/backends.md) for details.
 ### Distributed Execution

{themis_eval-0.2.1 → themis_eval-0.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "themis-eval"
-version = "0.2.1"
+version = "0.2.2"
 description = "Lightweight evaluation platform for LLM experiments"
 readme = "README.md"
 requires-python = ">=3.12"
@@ -32,6 +32,7 @@ dependencies = [
     "tenacity>=9.1.2",
     "plotly>=6.5.0",
     "math-verify>=0.8.0",
+    "rich>=14.2.0",
 ]
 [tool.setuptools.packages.find]

themis_eval-0.2.2/themis/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Themis experiment platform - Dead simple LLM evaluation.
+The primary interface is the `evaluate()` function:
+    import themis
+    report = themis.evaluate("math500", model="gpt-4", limit=100)
+Extension APIs for registering custom components:
+    - themis.register_metric() - Register custom metrics
+    - themis.register_dataset() - Register custom datasets
+    - themis.register_provider() - Register custom model providers
+    - themis.register_benchmark() - Register custom benchmark presets
+"""
+from themis import config, core, evaluation, experiment, generation, project
+from themis._version import __version__
+from themis.api import evaluate, get_registered_metrics, register_metric
+from themis.datasets import register_dataset, list_datasets, is_dataset_registered
+from themis.presets import register_benchmark, list_benchmarks, get_benchmark_preset
+from themis.providers import register_provider
+__all__ = [
+    # Main API
+    "evaluate",
+    # Metrics
+    "register_metric",
+    "get_registered_metrics",
+    # Datasets
+    "register_dataset",
+    "list_datasets",
+    "is_dataset_registered",
+    # Benchmarks
+    "register_benchmark",
+    "list_benchmarks",
+    "get_benchmark_preset",
+    # Providers
+    "register_provider",
+    # Submodules
+    "config",
+    "core",
+    "evaluation",
+    "experiment",
+    "generation",
+    "project",
+    # Version
+    "__version__",
+]

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/_version.py RENAMED Viewed

@@ -9,7 +9,7 @@ def _detect_version() -> str:
     try:
         return metadata.version("themis-eval")
     except metadata.PackageNotFoundError:  # pragma: no cover - local dev only
-        return "0.2.1"  # Fallback for development
+        return "0.2.2"  # Fallback for development
 __version__ = _detect_version()

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/api.py RENAMED Viewed

@@ -66,6 +66,55 @@ except ImportError:
 logger = logging.getLogger(__name__)
+# Module-level metrics registry for custom metrics
+_METRICS_REGISTRY: dict[str, type] = {}
+def register_metric(name: str, metric_cls: type) -> None:
+    """Register a custom metric for use in evaluate().
+    This allows users to add their own metrics to Themis without modifying
+    the source code. Registered metrics can be used by passing their names
+    to the `metrics` parameter in evaluate().
+    Args:
+        name: Metric name (used in evaluate(metrics=[name]))
+        metric_cls: Metric class implementing the Metric interface.
+            Must have a compute() method that takes prediction, references,
+            and metadata parameters.
+    Raises:
+        TypeError: If metric_cls is not a class
+        ValueError: If metric_cls doesn't implement the required interface
+    Example:
+        >>> from themis.evaluation.metrics import MyCustomMetric
+        >>> themis.register_metric("my_metric", MyCustomMetric)
+        >>> report = themis.evaluate("math500", model="gpt-4", metrics=["my_metric"])
+    """
+    if not isinstance(metric_cls, type):
+        raise TypeError(f"metric_cls must be a class, got {type(metric_cls)}")
+    # Validate that it implements the Metric interface
+    if not hasattr(metric_cls, "compute"):
+        raise ValueError(
+            f"{metric_cls.__name__} must implement compute() method. "
+            f"See themis.evaluation.metrics for examples."
+        )
+    _METRICS_REGISTRY[name] = metric_cls
+    logger.info(f"Registered custom metric: {name} -> {metric_cls.__name__}")
+def get_registered_metrics() -> dict[str, type]:
+    """Get all currently registered custom metrics.
+    Returns:
+        Dictionary mapping metric names to their classes
+    """
+    return _METRICS_REGISTRY.copy()
 def evaluate(
     benchmark_or_dataset: str | Sequence[dict[str, Any]],
     *,
@@ -384,8 +433,8 @@ def _resolve_metrics(metric_names: list[str]) -> list:
     except ImportError:
         nlp_available = False
-    # Metric registry
-    METRICS_REGISTRY = {
+    # Built-in metrics registry
+    BUILTIN_METRICS = {
         # Core metrics
         "exact_match": ExactMatch,
         "math_verify": MathVerifyAccuracy,
@@ -394,7 +443,7 @@ def _resolve_metrics(metric_names: list[str]) -> list:
     # Add NLP metrics if available
     if nlp_available:
-        METRICS_REGISTRY.update({
+        BUILTIN_METRICS.update({
             "bleu": BLEU,
             "rouge1": lambda: ROUGE(variant=ROUGEVariant.ROUGE_1),
             "rouge2": lambda: ROUGE(variant=ROUGEVariant.ROUGE_2),
@@ -407,6 +456,10 @@ def _resolve_metrics(metric_names: list[str]) -> list:
     # "pass_at_k": PassAtK,
     # "codebleu": CodeBLEU,
+    # Merge built-in and custom metrics
+    # Custom metrics can override built-in metrics
+    METRICS_REGISTRY = {**BUILTIN_METRICS, **_METRICS_REGISTRY}
     metrics = []
     for name in metric_names:
         if name not in METRICS_REGISTRY:
@@ -426,4 +479,4 @@ def _resolve_metrics(metric_names: list[str]) -> list:
     return metrics
-__all__ = ["evaluate"]
+__all__ = ["evaluate", "register_metric", "get_registered_metrics"]

themis_eval-0.2.2/themis/presets/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Preset configurations for common benchmarks and models.
+This module provides automatic configuration for popular benchmarks,
+eliminating the need for manual setup of prompts, metrics, and extractors.
+"""
+from themis.presets.benchmarks import (
+    BenchmarkPreset,
+    get_benchmark_preset,
+    list_benchmarks,
+    register_benchmark,
+)
+from themis.presets.models import parse_model_name
+__all__ = [
+    "BenchmarkPreset",
+    "register_benchmark",
+    "get_benchmark_preset",
+    "list_benchmarks",
+    "parse_model_name",
+]

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/utils/logging_utils.py RENAMED Viewed

@@ -5,6 +5,9 @@ from __future__ import annotations
 import logging
 from typing import Mapping
+from rich.logging import RichHandler
+from rich.traceback import install as install_rich_traceback
 TRACE_LEVEL = 5
 logging.addLevelName(TRACE_LEVEL, "TRACE")
@@ -28,12 +31,14 @@ _LEVELS: Mapping[str, int] = {
 def configure_logging(level: str = "info") -> None:
     """Configure root logging with human-friendly formatting."""
+    install_rich_traceback()
     numeric_level = _LEVELS.get(level.lower(), logging.INFO)
     logging.basicConfig(
         level=numeric_level,
-        format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
-        datefmt="%H:%M:%S",
+        format="%(message)s",
+        datefmt="[%X]",
+        handlers=[RichHandler(rich_tracebacks=True, markup=True)],
         force=True,
     )

themis_eval-0.2.2/themis/utils/progress.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""Simple CLI-friendly progress reporter."""
+from __future__ import annotations
+from contextlib import AbstractContextManager
+from typing import Any, Callable
+from rich.progress import (
+    BarColumn,
+    MofNCompleteColumn,
+    Progress,
+    SpinnerColumn,
+    TaskProgressColumn,
+    TextColumn,
+    TimeElapsedColumn,
+    TimeRemainingColumn,
+)
+class ProgressReporter(AbstractContextManager["ProgressReporter"]):
+    def __init__(
+        self,
+        *,
+        total: int | None,
+        description: str = "Processing",
+        unit: str = "sample",
+        leave: bool = False,
+    ) -> None:
+        self._total = total
+        self._description = description
+        self._unit = unit
+        self._leave = leave
+        self._progress: Progress | None = None
+        self._task_id = None
+    def __enter__(self) -> "ProgressReporter":
+        self.start()
+        return self
+    def __exit__(self, *_exc) -> None:
+        self.close()
+    def start(self) -> None:
+        if self._progress is None:
+            self._progress = Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                BarColumn(),
+                TaskProgressColumn(),
+                MofNCompleteColumn(),
+                TimeElapsedColumn(),
+                TimeRemainingColumn(),
+                transient=not self._leave,
+            )
+            self._progress.start()
+            self._task_id = self._progress.add_task(
+                self._description, total=self._total
+            )
+    def close(self) -> None:
+        if self._progress is not None:
+            self._progress.stop()
+            self._progress = None
+            self._task_id = None
+    def increment(self, step: int = 1) -> None:
+        if self._progress is not None and self._task_id is not None:
+            self._progress.update(self._task_id, advance=step)
+    def on_result(self, _record: Any) -> None:
+        self.increment()
+    def as_callback(self) -> Callable[[Any], None]:
+        return self.on_result
+__all__ = ["ProgressReporter"]

{themis_eval-0.2.1 → themis_eval-0.2.2/themis_eval.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: themis-eval
-Version: 0.2.1
+Version: 0.2.2
 Summary: Lightweight evaluation platform for LLM experiments
 Author: Pittawat Taveekitworachai
 License: MIT
@@ -25,6 +25,7 @@ Requires-Dist: tabulate>=0.9.0
 Requires-Dist: tenacity>=9.1.2
 Requires-Dist: plotly>=6.5.0
 Requires-Dist: math-verify>=0.8.0
+Requires-Dist: rich>=14.2.0
 Provides-Extra: dev
 Requires-Dist: pytest>=8.0; extra == "dev"
 Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
@@ -358,9 +359,9 @@ Themis is built on a clean, modular architecture:
 - **[API Reference](docs/index.md)** - Detailed API documentation
 - **[Examples](examples-simple/)** - Runnable code examples
-- **[Extending Backends](docs/EXTENDING_BACKENDS.md)** - Custom storage and execution
-- **[API Server](docs/API_SERVER.md)** - Web dashboard and REST API
-- **[Comparison Engine](docs/COMPARISON.md)** - Statistical testing guide
+- **[Extending Backends](docs/customization/backends.md)** - Custom storage and execution
+- **[API Server](docs/reference/api-server.md)** - Web dashboard and REST API
+- **[Comparison Engine](docs/guides/comparison.md)** - Statistical testing guide
 ---
@@ -388,7 +389,7 @@ result = evaluate(
 )
 ```
-See [EXTENDING_BACKENDS.md](docs/EXTENDING_BACKENDS.md) for details.
+See [docs/customization/backends.md](docs/customization/backends.md) for details.
 ### Distributed Execution

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis_eval.egg-info/requires.txt RENAMED Viewed

@@ -8,6 +8,7 @@ tabulate>=0.9.0
 tenacity>=9.1.2
 plotly>=6.5.0
 math-verify>=0.8.0
+rich>=14.2.0
 [all]
 themis-eval[code,docs,math,nlp,server,viz]

themis_eval-0.2.1/themis/__init__.py DELETED Viewed

@@ -1,25 +0,0 @@
-"""Themis experiment platform - Dead simple LLM evaluation.
-The primary interface is the `evaluate()` function:
-    import themis
-    report = themis.evaluate("math500", model="gpt-4", limit=100)
-"""
-from themis import config, core, evaluation, experiment, generation, project
-from themis._version import __version__
-from themis.api import evaluate
-__all__ = [
-    # Main API
-    "evaluate",
-    # Submodules
-    "config",
-    "core",
-    "evaluation",
-    "experiment",
-    "generation",
-    "project",
-    # Version
-    "__version__",
-]

themis_eval-0.2.1/themis/presets/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-"""Preset configurations for common benchmarks and models.
-This module provides automatic configuration for popular benchmarks,
-eliminating the need for manual setup of prompts, metrics, and extractors.
-"""
-from themis.presets.benchmarks import get_benchmark_preset, list_benchmarks
-from themis.presets.models import parse_model_name
-__all__ = ["get_benchmark_preset", "list_benchmarks", "parse_model_name"]

themis_eval-0.2.1/themis/utils/progress.py DELETED Viewed

@@ -1,58 +0,0 @@
-"""Simple CLI-friendly progress reporter."""
-from __future__ import annotations
-from contextlib import AbstractContextManager
-from typing import Any, Callable
-from tqdm import tqdm
-class ProgressReporter(AbstractContextManager["ProgressReporter"]):
-    def __init__(
-        self,
-        *,
-        total: int | None,
-        description: str = "Processing",
-        unit: str = "sample",
-        leave: bool = False,
-    ) -> None:
-        self._total = total
-        self._description = description
-        self._unit = unit
-        self._leave = leave
-        self._pbar: tqdm | None = None
-    def __enter__(self) -> "ProgressReporter":
-        self.start()
-        return self
-    def __exit__(self, *_exc) -> None:
-        self.close()
-    def start(self) -> None:
-        if self._pbar is None:
-            self._pbar = tqdm(
-                total=self._total,
-                desc=self._description,
-                unit=self._unit,
-                leave=self._leave,
-            )
-    def close(self) -> None:
-        if self._pbar is not None:
-            self._pbar.close()
-            self._pbar = None
-    def increment(self, step: int = 1) -> None:
-        if self._pbar is not None:
-            self._pbar.update(step)
-    def on_result(self, _record: Any) -> None:
-        self.increment()
-    def as_callback(self) -> Callable[[Any], None]:
-        return self.on_result
-__all__ = ["ProgressReporter"]

{themis_eval-0.2.1 → themis_eval-0.2.2}/LICENSE RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/setup.cfg RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/tests/test_package_metadata.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/backends/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/backends/execution.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/backends/storage.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/__main__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/benchmarks.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/comparison.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/config_commands.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/cost.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/demo.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/info.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/leaderboard.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/math_benchmarks.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/mcq_benchmarks.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/results.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/sample_run.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/commands/visualize.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/main.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/new_project.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/cli/utils.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/comparison/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/comparison/engine.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/comparison/reports.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/comparison/statistics.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/config/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/config/loader.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/config/registry.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/config/runtime.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/config/schema.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/core/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/core/conversation.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/core/entities.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/core/serialization.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/core/tools.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/core/types.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/base.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/commonsense_qa.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/competition_math.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/coqa.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/gpqa.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/gsm8k.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/gsm_symbolic.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/math500.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/med_qa.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/medmcqa.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/mmlu_pro.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/piqa.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/registry.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/schema.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/sciq.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/social_i_qa.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/datasets/super_gpqa.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/conditional.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/extractors/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/extractors/error_taxonomy_extractor.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/extractors/exceptions.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/extractors/identity_extractor.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/extractors/json_field_extractor.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/extractors/math_verify_extractor.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/extractors/regex_extractor.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/math_verify_utils.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/code/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/code/codebleu.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/code/execution.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/code/pass_at_k.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/composite_metric.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/consistency_metric.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/exact_match.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/length_difference_tolerance.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/math_verify_accuracy.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/nlp/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/nlp/bertscore.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/nlp/bleu.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/nlp/meteor.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/nlp/rouge.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/pairwise_judge_metric.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/response_length.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/metrics/rubric_judge_metric.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/pipeline.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/pipelines/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/pipelines/composable_pipeline.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/pipelines/standard_pipeline.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/reports.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/statistics/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/statistics/bootstrap.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/statistics/confidence_intervals.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/statistics/distributions.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/statistics/effect_sizes.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/statistics/hypothesis_tests.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/statistics/types.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/strategies/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/strategies/attempt_aware_evaluation_strategy.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/strategies/default_evaluation_strategy.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/strategies/evaluation_strategy.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/evaluation/strategies/judge_evaluation_strategy.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/builder.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/cache_manager.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/comparison.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/cost.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/definitions.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/export.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/export_csv.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/integration_manager.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/math.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/mcq.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/orchestrator.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/pricing.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/storage.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/experiment/visualization.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/agentic_runner.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/batching.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/clients.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/conversation_runner.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/plan.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/providers/litellm_provider.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/providers/vllm_provider.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/router.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/runner.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/strategies.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/templates.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/turn_strategies.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/generation/types.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/integrations/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/integrations/huggingface.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/integrations/wandb.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/interfaces/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/presets/benchmarks.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/presets/models.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/project/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/project/definitions.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/project/patterns.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/providers/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/providers/registry.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/py.typed RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/server/__init__.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/server/app.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/utils/api_generator.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/utils/cost_tracking.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/utils/dashboard.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis/utils/tracing.py RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis_eval.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis_eval.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{themis_eval-0.2.1 → themis_eval-0.2.2}/themis_eval.egg-info/top_level.txt RENAMED Viewed

File without changes

themis-eval 0.2.1__tar.gz → 0.2.2__tar.gz

themis-eval 0.2.1tar.gz → 0.2.2tar.gz