PyPI - wisent - Versions diffs - 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

wisent 0.1.1py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (237) hide show

wisent/__init__.py +1 -8
wisent/benchmarks/__init__.py +0 -0
wisent/benchmarks/coding/__init__.py +0 -0
wisent/benchmarks/coding/metrics/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
wisent/benchmarks/coding/metrics/evaluator.py +275 -0
wisent/benchmarks/coding/metrics/passk.py +66 -0
wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
wisent/benchmarks/coding/providers/__init__.py +18 -0
wisent/benchmarks/coding/providers/core/__init__.py +0 -0
wisent/benchmarks/coding/providers/core/atoms.py +31 -0
wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
wisent/classifiers/__init__.py +0 -0
wisent/classifiers/core/__init__.py +0 -0
wisent/classifiers/core/atoms.py +747 -0
wisent/classifiers/models/__init__.py +0 -0
wisent/classifiers/models/logistic.py +29 -0
wisent/classifiers/models/mlp.py +47 -0
wisent/cli/__init__.py +0 -0
wisent/cli/classifiers/__init__.py +0 -0
wisent/cli/classifiers/classifier_rotator.py +137 -0
wisent/cli/cli_logger.py +142 -0
wisent/cli/data_loaders/__init__.py +0 -0
wisent/cli/data_loaders/data_loader_rotator.py +96 -0
wisent/cli/evaluators/__init__.py +0 -0
wisent/cli/evaluators/evaluator_rotator.py +148 -0
wisent/cli/steering_methods/__init__.py +0 -0
wisent/cli/steering_methods/steering_rotator.py +110 -0
wisent/cli/wisent_cli/__init__.py +0 -0
wisent/cli/wisent_cli/commands/__init__.py +0 -0
wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
wisent/cli/wisent_cli/commands/listing.py +154 -0
wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
wisent/cli/wisent_cli/main.py +93 -0
wisent/cli/wisent_cli/shell.py +80 -0
wisent/cli/wisent_cli/ui.py +69 -0
wisent/cli/wisent_cli/util/__init__.py +0 -0
wisent/cli/wisent_cli/util/aggregations.py +43 -0
wisent/cli/wisent_cli/util/parsing.py +126 -0
wisent/cli/wisent_cli/version.py +4 -0
wisent/core/__init__.py +27 -0
wisent/core/activations/__init__.py +0 -0
wisent/core/activations/activations_collector.py +338 -0
wisent/core/activations/core/__init__.py +0 -0
wisent/core/activations/core/atoms.py +216 -0
wisent/core/agent/__init__.py +18 -0
wisent/core/agent/budget.py +638 -0
wisent/core/agent/device_benchmarks.py +685 -0
wisent/core/agent/diagnose/__init__.py +55 -0
wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
wisent/core/agent/diagnose/create_classifier.py +1154 -0
wisent/core/agent/diagnose/response_diagnostics.py +268 -0
wisent/core/agent/diagnose/select_classifiers.py +506 -0
wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
wisent/core/agent/diagnose/tasks/__init__.py +33 -0
wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
wisent/core/agent/diagnose.py +242 -0
wisent/core/agent/steer.py +212 -0
wisent/core/agent/timeout.py +134 -0
wisent/core/autonomous_agent.py +1234 -0
wisent/core/bigcode_integration.py +583 -0
wisent/core/contrastive_pairs/__init__.py +15 -0
wisent/core/contrastive_pairs/core/__init__.py +0 -0
wisent/core/contrastive_pairs/core/atoms.py +45 -0
wisent/core/contrastive_pairs/core/buliders.py +59 -0
wisent/core/contrastive_pairs/core/pair.py +178 -0
wisent/core/contrastive_pairs/core/response.py +152 -0
wisent/core/contrastive_pairs/core/serialization.py +300 -0
wisent/core/contrastive_pairs/core/set.py +133 -0
wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
wisent/core/data_loaders/__init__.py +0 -0
wisent/core/data_loaders/core/__init__.py +0 -0
wisent/core/data_loaders/core/atoms.py +98 -0
wisent/core/data_loaders/loaders/__init__.py +0 -0
wisent/core/data_loaders/loaders/custom.py +120 -0
wisent/core/data_loaders/loaders/lm_loader.py +218 -0
wisent/core/detection_handling.py +257 -0
wisent/core/download_full_benchmarks.py +1386 -0
wisent/core/evaluators/__init__.py +0 -0
wisent/core/evaluators/oracles/__init__.py +0 -0
wisent/core/evaluators/oracles/interactive.py +73 -0
wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
wisent/core/evaluators/oracles/user_specified.py +67 -0
wisent/core/hyperparameter_optimizer.py +429 -0
wisent/core/lm_eval_harness_ground_truth.py +1396 -0
wisent/core/log_likelihoods_evaluator.py +321 -0
wisent/core/managed_cached_benchmarks.py +595 -0
wisent/core/mixed_benchmark_sampler.py +364 -0
wisent/core/model_config_manager.py +330 -0
wisent/core/model_persistence.py +317 -0
wisent/core/models/__init__.py +0 -0
wisent/core/models/core/__init__.py +0 -0
wisent/core/models/core/atoms.py +460 -0
wisent/core/models/wisent_model.py +727 -0
wisent/core/multi_steering.py +316 -0
wisent/core/optuna/__init__.py +57 -0
wisent/core/optuna/classifier/__init__.py +25 -0
wisent/core/optuna/classifier/activation_generator.py +349 -0
wisent/core/optuna/classifier/classifier_cache.py +509 -0
wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
wisent/core/optuna/steering/__init__.py +0 -0
wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
wisent/core/optuna/steering/data_utils.py +342 -0
wisent/core/optuna/steering/metrics.py +474 -0
wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
wisent/core/optuna/steering/steering_optimization.py +1111 -0
wisent/core/parser.py +1668 -0
wisent/core/prompts/__init__.py +0 -0
wisent/core/prompts/core/__init__.py +0 -0
wisent/core/prompts/core/atom.py +57 -0
wisent/core/prompts/core/prompt_formater.py +157 -0
wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
wisent/core/representation.py +5 -0
wisent/core/sample_size_optimizer.py +648 -0
wisent/core/sample_size_optimizer_v2.py +355 -0
wisent/core/save_results.py +277 -0
wisent/core/steering.py +652 -0
wisent/core/steering_method.py +26 -0
wisent/core/steering_methods/__init__.py +0 -0
wisent/core/steering_methods/core/__init__.py +0 -0
wisent/core/steering_methods/core/atoms.py +153 -0
wisent/core/steering_methods/methods/__init__.py +0 -0
wisent/core/steering_methods/methods/caa.py +44 -0
wisent/core/steering_optimizer.py +1297 -0
wisent/core/task_interface.py +132 -0
wisent/core/task_selector.py +189 -0
wisent/core/tasks/__init__.py +175 -0
wisent/core/tasks/aime_task.py +141 -0
wisent/core/tasks/file_task.py +211 -0
wisent/core/tasks/hle_task.py +180 -0
wisent/core/tasks/hmmt_task.py +119 -0
wisent/core/tasks/livecodebench_task.py +201 -0
wisent/core/tasks/livemathbench_task.py +158 -0
wisent/core/tasks/lm_eval_task.py +455 -0
wisent/core/tasks/math500_task.py +84 -0
wisent/core/tasks/polymath_task.py +146 -0
wisent/core/tasks/supergpqa_task.py +220 -0
wisent/core/time_estimator.py +149 -0
wisent/core/timing_calibration.py +174 -0
wisent/core/tracking/__init__.py +54 -0
wisent/core/tracking/latency.py +618 -0
wisent/core/tracking/memory.py +359 -0
wisent/core/trainers/__init__.py +0 -0
wisent/core/trainers/core/__init__.py +11 -0
wisent/core/trainers/core/atoms.py +45 -0
wisent/core/trainers/steering_trainer.py +271 -0
wisent/core/user_model_config.py +158 -0
wisent/opti/__init__.py +0 -0
wisent/opti/core/__init__.py +0 -0
wisent/opti/core/atoms.py +175 -0
wisent/opti/methods/__init__.py +0 -0
wisent/opti/methods/opti_classificator.py +172 -0
wisent/opti/methods/opti_steering.py +138 -0
wisent/synthetic/__init__.py +0 -0
wisent/synthetic/cleaners/__init__.py +0 -0
wisent/synthetic/cleaners/core/__init__.py +0 -0
wisent/synthetic/cleaners/core/atoms.py +58 -0
wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
wisent/synthetic/cleaners/methods/__init__.py +0 -0
wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
wisent/synthetic/db_instructions/__init__.py +0 -0
wisent/synthetic/db_instructions/core/__init__.py +0 -0
wisent/synthetic/db_instructions/core/atoms.py +25 -0
wisent/synthetic/db_instructions/mini_dp.py +37 -0
wisent/synthetic/generators/__init__.py +0 -0
wisent/synthetic/generators/core/__init__.py +0 -0
wisent/synthetic/generators/core/atoms.py +73 -0
wisent/synthetic/generators/diversities/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/__init__.py +0 -0
wisent/synthetic/generators/diversities/core/core.py +68 -0
wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
wisent/synthetic/generators/pairs_generator.py +179 -0
wisent-0.5.1.dist-info/METADATA +67 -0
wisent-0.5.1.dist-info/RECORD +218 -0
{wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/WHEEL +1 -1
{wisent-0.1.1.dist-info → wisent-0.5.1.dist-info/licenses}/LICENSE +2 -2
wisent/activations/__init__.py +0 -9
wisent/activations/client.py +0 -97
wisent/activations/extractor.py +0 -251
wisent/activations/models.py +0 -95
wisent/client.py +0 -45
wisent/control_vector/__init__.py +0 -9
wisent/control_vector/client.py +0 -85
wisent/control_vector/manager.py +0 -168
wisent/control_vector/models.py +0 -70
wisent/inference/__init__.py +0 -9
wisent/inference/client.py +0 -103
wisent/inference/inferencer.py +0 -250
wisent/inference/models.py +0 -66
wisent/utils/__init__.py +0 -3
wisent/utils/auth.py +0 -30
wisent/utils/http.py +0 -228
wisent/version.py +0 -3
wisent-0.1.1.dist-info/METADATA +0 -142
wisent-0.1.1.dist-info/RECORD +0 -23
{wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/top_level.txt +0 -0

wisent/core/agent/budget.py ADDED Viewed

@@ -0,0 +1,638 @@
+"""
+Budget and resource management for wisent-guard agent operations.
+This module provides utilities for managing time budgets, resource allocation,
+and optimizing task execution within specified constraints.
+"""
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass
+from enum import Enum
+import time
+import math
+class ResourceType(Enum):
+    """Types of resources that can be budgeted."""
+    TIME = "time"
+    MEMORY = "memory"
+    COMPUTE = "compute"
+    TOKENS = "tokens"
+@dataclass
+class ResourceBudget:
+    """Represents a budget for a specific resource type."""
+    resource_type: ResourceType
+    total_budget: float
+    used_budget: float = 0.0
+    unit: str = ""
+    @property
+    def remaining_budget(self) -> float:
+        """Calculate remaining budget."""
+        return max(0.0, self.total_budget - self.used_budget)
+    @property
+    def usage_percentage(self) -> float:
+        """Calculate percentage of budget used."""
+        if self.total_budget <= 0:
+            return 0.0
+        return (self.used_budget / self.total_budget) * 100.0
+    def can_afford(self, cost: float) -> bool:
+        """Check if we can afford a given cost."""
+        return self.remaining_budget >= cost
+    def spend(self, amount: float) -> bool:
+        """Spend from the budget. Returns True if successful."""
+        if self.can_afford(amount):
+            self.used_budget += amount
+            return True
+        return False
+@dataclass
+class TaskEstimate:
+    """Estimates for a specific task."""
+    task_name: str
+    time_seconds: float
+    memory_mb: float = 0.0
+    compute_units: float = 0.0
+    tokens: int = 0
+    def scale(self, factor: float) -> 'TaskEstimate':
+        """Scale all estimates by a factor."""
+        return TaskEstimate(
+            task_name=self.task_name,
+            time_seconds=self.time_seconds * factor,
+            memory_mb=self.memory_mb * factor,
+            compute_units=self.compute_units * factor,
+            tokens=int(self.tokens * factor)
+        )
+class BudgetManager:
+    """Manages budgets and resource allocation for agent operations."""
+    def __init__(self):
+        self.budgets: Dict[ResourceType, ResourceBudget] = {}
+        self.task_estimates: Dict[str, TaskEstimate] = {}
+        self._default_estimates = self._get_default_task_estimates()
+    def set_time_budget(self, minutes: float) -> None:
+        """Set a time budget in minutes."""
+        self.budgets[ResourceType.TIME] = ResourceBudget(
+            resource_type=ResourceType.TIME,
+            total_budget=minutes * 60.0,  # Convert to seconds
+            unit="seconds"
+        )
+    def set_budget(self, resource_type: ResourceType, amount: float, unit: str = "") -> None:
+        """Set a budget for any resource type."""
+        self.budgets[resource_type] = ResourceBudget(
+            resource_type=resource_type,
+            total_budget=amount,
+            unit=unit
+        )
+    def get_budget(self, resource_type: ResourceType) -> Optional[ResourceBudget]:
+        """Get budget for a specific resource type."""
+        return self.budgets.get(resource_type)
+    def optimize_task_allocation(self,
+                               task_candidates: List[str],
+                               primary_resource: ResourceType = ResourceType.TIME,
+                               max_tasks: Optional[int] = None) -> List[str]:
+        """
+        Optimize task allocation within budget constraints.
+        Args:
+            task_candidates: List of candidate task names
+            primary_resource: Primary resource to optimize for
+            max_tasks: Maximum number of tasks to select
+        Returns:
+            List of selected tasks that fit within budget
+        """
+        budget = self.budgets.get(primary_resource)
+        if not budget:
+            return task_candidates[:max_tasks] if max_tasks else task_candidates
+        # Calculate cost for each task
+        task_costs = []
+        for task in task_candidates:
+            cost = self._estimate_task_cost(task, primary_resource)
+            if cost > 0:
+                task_costs.append((task, cost))
+        # Sort by cost (ascending) to prioritize cheaper tasks
+        task_costs.sort(key=lambda x: x[1])
+        # Select tasks that fit within budget
+        selected_tasks = []
+        remaining_budget = budget.remaining_budget
+        for task, cost in task_costs:
+            if cost <= remaining_budget:
+                selected_tasks.append(task)
+                remaining_budget -= cost
+                if max_tasks and len(selected_tasks) >= max_tasks:
+                    break
+        return selected_tasks
+    def calculate_max_tasks_for_budget(self,
+                                     task_type: str = "default",
+                                     time_budget_minutes: float = 5.0) -> int:
+        """
+        Calculate maximum number of tasks that can fit within a time budget.
+        Args:
+            task_type: Type of task to estimate
+            time_budget_minutes: Time budget in minutes
+        Returns:
+            Maximum number of tasks
+        """
+        time_budget_seconds = time_budget_minutes * 60.0
+        # Get estimate for this task type
+        task_estimate = self._estimate_task_cost(task_type, ResourceType.TIME)
+        if task_estimate <= 0:
+            return 1  # Fallback to at least 1 task
+        max_tasks = max(1, int(time_budget_seconds / task_estimate))
+        return max_tasks
+    def estimate_completion_time(self, tasks: List[str]) -> float:
+        """
+        Estimate total completion time for a list of tasks.
+        Args:
+            tasks: List of task names
+        Returns:
+            Estimated time in seconds
+        """
+        total_time = 0.0
+        for task in tasks:
+            total_time += self._estimate_task_cost(task, ResourceType.TIME)
+        return total_time
+    def track_task_execution(self, task_name: str, start_time: float, end_time: float) -> None:
+        """
+        Track actual execution time for a task to improve future estimates.
+        Args:
+            task_name: Name of the task
+            start_time: Start timestamp
+            end_time: End timestamp
+        """
+        actual_time = end_time - start_time
+        # Update our estimates based on actual performance
+        if task_name in self.task_estimates:
+            # Use exponential moving average to update estimates
+            current_estimate = self.task_estimates[task_name].time_seconds
+            alpha = 0.3  # Learning rate
+            new_estimate = alpha * actual_time + (1 - alpha) * current_estimate
+            self.task_estimates[task_name].time_seconds = new_estimate
+        else:
+            # First time seeing this task
+            self.task_estimates[task_name] = TaskEstimate(
+                task_name=task_name,
+                time_seconds=actual_time
+            )
+    def get_budget_summary(self) -> Dict[str, Any]:
+        """Get a summary of all budgets and their usage."""
+        summary = {}
+        for resource_type, budget in self.budgets.items():
+            summary[resource_type.value] = {
+                "total": budget.total_budget,
+                "used": budget.used_budget,
+                "remaining": budget.remaining_budget,
+                "percentage_used": budget.usage_percentage,
+                "unit": budget.unit
+            }
+        return summary
+    def _estimate_task_cost(self, task_name: str, resource_type: ResourceType) -> float:
+        """Estimate the cost of a task for a specific resource type."""
+        # Check if we have a specific estimate for this task
+        if task_name in self.task_estimates:
+            estimate = self.task_estimates[task_name]
+            if resource_type == ResourceType.TIME:
+                return estimate.time_seconds
+            elif resource_type == ResourceType.MEMORY:
+                return estimate.memory_mb
+            elif resource_type == ResourceType.COMPUTE:
+                return estimate.compute_units
+            elif resource_type == ResourceType.TOKENS:
+                return float(estimate.tokens)
+        # Fall back to default estimates
+        return self._get_default_cost_estimate(task_name, resource_type)
+    def _get_default_cost_estimate(self, task_name: str, resource_type: ResourceType) -> float:
+        """Get default cost estimate for a task using device benchmarking."""
+        if resource_type == ResourceType.TIME:
+            # Use device-specific benchmarks for time estimates
+            try:
+                from .device_benchmarks import estimate_task_time
+                # Map task names to benchmark types
+                task_mapping = {
+                    "benchmark": "benchmark_eval",
+                    "eval": "benchmark_eval",
+                    "classifier": "classifier_training",
+                    "training": "classifier_training",
+                    "generation": "data_generation",
+                    "synthetic": "data_generation",
+                    "steering": "steering",
+                    "model_loading": "model_loading"
+                }
+                # Find the best matching task type
+                benchmark_type = None
+                for pattern, task_type in task_mapping.items():
+                    if pattern in task_name.lower():
+                        benchmark_type = task_type
+                        break
+                if benchmark_type:
+                    # Get quantity based on task type
+                    if benchmark_type in ["benchmark_eval", "classifier_training"]:
+                        quantity = 100  # Base unit for these tasks
+                    else:
+                        quantity = 1
+                    return estimate_task_time(benchmark_type, quantity)
+                else:
+                    # Use benchmark_eval as default
+                    return estimate_task_time("benchmark_eval", 100)
+            except Exception as e:
+                raise RuntimeError(f"Device benchmark estimate failed for task '{task_name}': {e}. Run device benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
+        elif resource_type == ResourceType.MEMORY:
+            raise RuntimeError(f"Memory estimation not implemented for task '{task_name}'")
+        elif resource_type == ResourceType.COMPUTE:
+            raise RuntimeError(f"Compute estimation not implemented for task '{task_name}'")
+        elif resource_type == ResourceType.TOKENS:
+            raise RuntimeError(f"Token estimation not implemented for task '{task_name}'")
+        raise RuntimeError(f"Unknown resource type: {resource_type}")
+    def _get_default_task_estimates(self) -> Dict[str, TaskEstimate]:
+        """Get default task estimates for common operations."""
+        # No default estimates - all estimates must come from device benchmarks
+        return {}
+# Global budget manager instance
+_budget_manager = BudgetManager()
+def get_budget_manager() -> BudgetManager:
+    """Get the global budget manager instance."""
+    return _budget_manager
+def set_time_budget(minutes: float) -> None:
+    """Convenience function to set time budget."""
+    _budget_manager.set_time_budget(minutes)
+def calculate_max_tasks_for_time_budget(task_type: str = "benchmark_evaluation",
+                                       time_budget_minutes: float = 5.0) -> int:
+    """
+    Calculate maximum number of tasks that can fit within a time budget.
+    Args:
+        task_type: Type of task to estimate (benchmark_evaluation, classifier_training, etc.)
+        time_budget_minutes: Time budget in minutes
+    Returns:
+        Maximum number of tasks
+    """
+    # Use device benchmarking for more accurate estimates
+    try:
+        from .device_benchmarks import estimate_task_time
+        # Map task types to benchmark types
+        benchmark_mapping = {
+            "benchmark_evaluation": "benchmark_eval",
+            "classifier_training": "classifier_training",
+            "data_generation": "data_generation",
+            "steering": "steering",
+            "model_loading": "model_loading"
+        }
+        benchmark_type = benchmark_mapping.get(task_type, "benchmark_eval")
+        # Get time per task
+        if benchmark_type in ["benchmark_eval", "classifier_training"]:
+            time_per_task = estimate_task_time(benchmark_type, 100) / 100  # Per unit
+        else:
+            time_per_task = estimate_task_time(benchmark_type, 1)
+        time_budget_seconds = time_budget_minutes * 60.0
+        max_tasks = max(1, int(time_budget_seconds / time_per_task))
+        return max_tasks
+    except Exception as e:
+        raise RuntimeError(f"Budget calculation failed for task '{task_type}': {e}. Run device benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
+def optimize_tasks_for_budget(task_candidates: List[str],
+                            time_budget_minutes: float = 5.0,
+                            max_tasks: Optional[int] = None) -> List[str]:
+    """
+    Optimize task selection within a time budget.
+    Args:
+        task_candidates: List of candidate task names
+        time_budget_minutes: Time budget in minutes
+        max_tasks: Maximum number of tasks to select
+    Returns:
+        List of selected tasks that fit within budget
+    """
+    _budget_manager.set_time_budget(time_budget_minutes)
+    return _budget_manager.optimize_task_allocation(
+        task_candidates,
+        ResourceType.TIME,
+        max_tasks
+    )
+def optimize_benchmarks_for_budget(task_candidates: List[str],
+                                 time_budget_minutes: float = 5.0,
+                                 max_tasks: Optional[int] = None,
+                                 prefer_fast: bool = False) -> List[str]:
+    """
+    Optimize benchmark selection within a time budget using priority and loading time data.
+    Args:
+        task_candidates: List of candidate benchmark names
+        time_budget_minutes: Time budget in minutes
+        max_tasks: Maximum number of tasks to select
+        prefer_fast: Whether to prefer fast benchmarks
+    Returns:
+        List of selected benchmarks that fit within budget
+    """
+    try:
+        # Import benchmark data
+        import sys
+        import os
+        sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lm-harness-integration'))
+        from only_benchmarks import BENCHMARKS
+        # Get benchmark information with loading times
+        benchmark_info = []
+        for task in task_candidates:
+            if task in BENCHMARKS:
+                config = BENCHMARKS[task]
+                loading_time = config.get('loading_time', 60.0)  # seconds
+                priority = config.get('priority', 'unknown')
+                # Calculate priority score for selection
+                priority_score = 0
+                if priority == 'high':
+                    priority_score = 3
+                elif priority == 'medium':
+                    priority_score = 2
+                elif priority == 'low':
+                    priority_score = 1
+                # Calculate efficiency score (priority per second)
+                efficiency_score = priority_score / max(loading_time, 1.0)
+                benchmark_info.append({
+                    'task': task,
+                    'loading_time': loading_time,
+                    'priority': priority,
+                    'priority_score': priority_score,
+                    'efficiency_score': efficiency_score
+                })
+            else:
+                # Fallback for unknown benchmarks
+                benchmark_info.append({
+                    'task': task,
+                    'loading_time': 60.0,
+                    'priority': 'unknown',
+                    'priority_score': 0,
+                    'efficiency_score': 0.0
+                })
+        # Sort by efficiency (prefer fast) or priority (prefer high priority)
+        if prefer_fast:
+            benchmark_info.sort(key=lambda x: x['efficiency_score'], reverse=True)
+        else:
+            benchmark_info.sort(key=lambda x: (x['priority_score'], -x['loading_time']), reverse=True)
+        # Select benchmarks that fit within budget
+        selected_benchmarks = []
+        total_time = 0.0
+        time_budget_seconds = time_budget_minutes * 60.0
+        for info in benchmark_info:
+            if total_time + info['loading_time'] <= time_budget_seconds:
+                selected_benchmarks.append(info['task'])
+                total_time += info['loading_time']
+                if max_tasks and len(selected_benchmarks) >= max_tasks:
+                    break
+        return selected_benchmarks
+    except Exception as e:
+        print(f"   ⚠️ Priority-aware budget optimization failed: {e}")
+        print(f"   🔄 Falling back to basic budget optimization...")
+        return optimize_tasks_for_budget(task_candidates, time_budget_minutes, max_tasks)
+def estimate_completion_time_minutes(tasks: List[str]) -> float:
+    """
+    Estimate total completion time for tasks in minutes.
+    Args:
+        tasks: List of task names
+    Returns:
+        Estimated time in minutes
+    """
+    seconds = _budget_manager.estimate_completion_time(tasks)
+    return seconds / 60.0
+def track_task_performance(task_name: str, start_time: float, end_time: float) -> None:
+    """
+    Track actual task performance to improve future estimates.
+    Args:
+        task_name: Name of the task
+        start_time: Start timestamp
+        end_time: End timestamp
+    """
+    _budget_manager.track_task_execution(task_name, start_time, end_time)
+def run_device_benchmark(force_rerun: bool = False) -> None:
+    """
+    Run device performance benchmark and save results.
+    Args:
+        force_rerun: Force re-run even if cached results exist
+    """
+    from .device_benchmarks import ensure_benchmark_exists
+    print("🚀 Running device performance benchmark...")
+    benchmark = ensure_benchmark_exists(force_rerun=force_rerun)
+    print("\n✅ Benchmark Results:")
+    print("=" * 50)
+    print(f"Device ID: {benchmark.device_id[:12]}...")
+    print(f"Device Type: {benchmark.device_type}")
+    print(f"Model Loading: {benchmark.model_loading_seconds:.1f}s")
+    print(f"Evaluation: {benchmark.benchmark_eval_seconds_per_100_examples:.1f}s per 100 examples")
+    print(f"Classifier Training: {benchmark.classifier_training_seconds_per_100_samples:.1f}s per 100 samples")
+    print(f"Steering: {benchmark.steering_seconds_per_example:.1f}s per example")
+    print(f"Data Generation: {benchmark.data_generation_seconds_per_example:.1f}s per example")
+    print(f"\nResults saved to: device_benchmarks.json")
+    # Show some example estimates
+    print("\n📊 Example Time Estimates:")
+    print("-" * 30)
+    print(f"Loading model: {benchmark.model_loading_seconds:.1f}s")
+    print(f"100 eval examples: {benchmark.benchmark_eval_seconds_per_100_examples:.1f}s")
+    print(f"Training classifier (200 samples): {(benchmark.classifier_training_seconds_per_100_samples * 2):.1f}s")
+    print(f"10 steering examples: {(benchmark.steering_seconds_per_example * 10):.1f}s")
+def get_device_info() -> Dict[str, str]:
+    """Get current device information."""
+    from .device_benchmarks import get_current_device_info
+    return get_current_device_info()
+def estimate_task_time_direct(task_type: str, quantity: int = 1) -> float:
+    """
+    Direct estimate of task time using device benchmarks.
+    Args:
+        task_type: Type of task ("model_loading", "benchmark_eval", etc.)
+        quantity: Number of items
+    Returns:
+        Estimated time in seconds
+    """
+    from .device_benchmarks import estimate_task_time
+    return estimate_task_time(task_type, quantity)
+# CLI functionality for budget management
+def main():
+    """CLI entry point for budget management and benchmarking."""
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="wisent-guard budget management and device benchmarking"
+    )
+    subparsers = parser.add_subparsers(dest='command', help='Available commands')
+    # Benchmark command
+    benchmark_parser = subparsers.add_parser('benchmark', help='Run device benchmark')
+    benchmark_parser.add_argument(
+        '--force', '-f',
+        action='store_true',
+        help='Force re-run benchmark even if cached results exist'
+    )
+    # Info command
+    info_parser = subparsers.add_parser('info', help='Show device information')
+    # Estimate command
+    estimate_parser = subparsers.add_parser('estimate', help='Estimate task time')
+    estimate_parser.add_argument('task_type', help='Type of task')
+    estimate_parser.add_argument('quantity', type=int, help='Number of items')
+    # Budget command
+    budget_parser = subparsers.add_parser('budget', help='Calculate budget allocations')
+    budget_parser.add_argument('--time-minutes', '-t', type=float, default=5.0, help='Time budget in minutes')
+    budget_parser.add_argument('--task-type', default='benchmark_evaluation', help='Task type to optimize for')
+    args = parser.parse_args()
+    if not args.command:
+        parser.print_help()
+        return 1
+    try:
+        if args.command == 'benchmark':
+            run_device_benchmark(force_rerun=args.force)
+        elif args.command == 'info':
+            print("🖥️ Current Device Information")
+            print("=" * 40)
+            device_info = get_device_info()
+            for key, value in device_info.items():
+                print(f"{key}: {value}")
+        elif args.command == 'estimate':
+            estimated_seconds = estimate_task_time_direct(args.task_type, args.quantity)
+            print(f"⏱️ Estimated time for {args.quantity}x {args.task_type}: {estimated_seconds:.1f} seconds ({estimated_seconds/60:.2f} minutes)")
+        elif args.command == 'budget':
+            max_tasks = calculate_max_tasks_for_time_budget(args.task_type, args.time_minutes)
+            # Map task types to benchmark types for direct estimation
+            benchmark_mapping = {
+                "benchmark_evaluation": "benchmark_eval",
+                "classifier_training": "classifier_training",
+                "data_generation": "data_generation",
+                "steering": "steering",
+                "model_loading": "model_loading"
+            }
+            benchmark_type = benchmark_mapping.get(args.task_type, "benchmark_eval")
+            # Get time per individual task unit
+            if benchmark_type in ["benchmark_eval", "classifier_training"]:
+                task_time = estimate_task_time_direct(benchmark_type, 100) / 100  # Per unit
+            else:
+                task_time = estimate_task_time_direct(benchmark_type, 1)
+            total_time = max_tasks * task_time
+            print(f"💰 Budget Analysis:")
+            print(f"Time budget: {args.time_minutes:.1f} minutes ({args.time_minutes * 60:.0f} seconds)")
+            print(f"Task type: {args.task_type} (mapped to {benchmark_type})")
+            print(f"Time per task: {task_time:.2f} seconds")
+            print(f"Max tasks: {max_tasks}")
+            print(f"Total estimated time: {total_time:.1f} seconds ({total_time/60:.2f} minutes)")
+            print(f"Budget utilization: {(total_time / (args.time_minutes * 60)) * 100:.1f}%")
+    except KeyboardInterrupt:
+        print("\n❌ Operation interrupted by user")
+        return 1
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        return 1
+    return 0
+if __name__ == "__main__":
+    import sys
+    sys.exit(main())

wisent 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

wisent 0.1.1py3-none-any.whl → 0.5.1py3-none-any.whl