wisent 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (237) hide show
  1. wisent/__init__.py +1 -8
  2. wisent/benchmarks/__init__.py +0 -0
  3. wisent/benchmarks/coding/__init__.py +0 -0
  4. wisent/benchmarks/coding/metrics/__init__.py +0 -0
  5. wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
  6. wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
  7. wisent/benchmarks/coding/metrics/evaluator.py +275 -0
  8. wisent/benchmarks/coding/metrics/passk.py +66 -0
  9. wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
  10. wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
  11. wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
  12. wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
  13. wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
  14. wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
  15. wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
  16. wisent/benchmarks/coding/providers/__init__.py +18 -0
  17. wisent/benchmarks/coding/providers/core/__init__.py +0 -0
  18. wisent/benchmarks/coding/providers/core/atoms.py +31 -0
  19. wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
  20. wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
  21. wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
  22. wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
  23. wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
  24. wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
  25. wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
  26. wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
  27. wisent/classifiers/__init__.py +0 -0
  28. wisent/classifiers/core/__init__.py +0 -0
  29. wisent/classifiers/core/atoms.py +747 -0
  30. wisent/classifiers/models/__init__.py +0 -0
  31. wisent/classifiers/models/logistic.py +29 -0
  32. wisent/classifiers/models/mlp.py +47 -0
  33. wisent/cli/__init__.py +0 -0
  34. wisent/cli/classifiers/__init__.py +0 -0
  35. wisent/cli/classifiers/classifier_rotator.py +137 -0
  36. wisent/cli/cli_logger.py +142 -0
  37. wisent/cli/data_loaders/__init__.py +0 -0
  38. wisent/cli/data_loaders/data_loader_rotator.py +96 -0
  39. wisent/cli/evaluators/__init__.py +0 -0
  40. wisent/cli/evaluators/evaluator_rotator.py +148 -0
  41. wisent/cli/steering_methods/__init__.py +0 -0
  42. wisent/cli/steering_methods/steering_rotator.py +110 -0
  43. wisent/cli/wisent_cli/__init__.py +0 -0
  44. wisent/cli/wisent_cli/commands/__init__.py +0 -0
  45. wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
  46. wisent/cli/wisent_cli/commands/listing.py +154 -0
  47. wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
  48. wisent/cli/wisent_cli/main.py +93 -0
  49. wisent/cli/wisent_cli/shell.py +80 -0
  50. wisent/cli/wisent_cli/ui.py +69 -0
  51. wisent/cli/wisent_cli/util/__init__.py +0 -0
  52. wisent/cli/wisent_cli/util/aggregations.py +43 -0
  53. wisent/cli/wisent_cli/util/parsing.py +126 -0
  54. wisent/cli/wisent_cli/version.py +4 -0
  55. wisent/core/__init__.py +27 -0
  56. wisent/core/activations/__init__.py +0 -0
  57. wisent/core/activations/activations_collector.py +338 -0
  58. wisent/core/activations/core/__init__.py +0 -0
  59. wisent/core/activations/core/atoms.py +216 -0
  60. wisent/core/agent/__init__.py +18 -0
  61. wisent/core/agent/budget.py +638 -0
  62. wisent/core/agent/device_benchmarks.py +685 -0
  63. wisent/core/agent/diagnose/__init__.py +55 -0
  64. wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
  65. wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
  66. wisent/core/agent/diagnose/create_classifier.py +1154 -0
  67. wisent/core/agent/diagnose/response_diagnostics.py +268 -0
  68. wisent/core/agent/diagnose/select_classifiers.py +506 -0
  69. wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
  70. wisent/core/agent/diagnose/tasks/__init__.py +33 -0
  71. wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
  72. wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
  73. wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
  74. wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
  75. wisent/core/agent/diagnose.py +242 -0
  76. wisent/core/agent/steer.py +212 -0
  77. wisent/core/agent/timeout.py +134 -0
  78. wisent/core/autonomous_agent.py +1234 -0
  79. wisent/core/bigcode_integration.py +583 -0
  80. wisent/core/contrastive_pairs/__init__.py +15 -0
  81. wisent/core/contrastive_pairs/core/__init__.py +0 -0
  82. wisent/core/contrastive_pairs/core/atoms.py +45 -0
  83. wisent/core/contrastive_pairs/core/buliders.py +59 -0
  84. wisent/core/contrastive_pairs/core/pair.py +178 -0
  85. wisent/core/contrastive_pairs/core/response.py +152 -0
  86. wisent/core/contrastive_pairs/core/serialization.py +300 -0
  87. wisent/core/contrastive_pairs/core/set.py +133 -0
  88. wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
  89. wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
  90. wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
  91. wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
  92. wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
  93. wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
  94. wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
  95. wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
  96. wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
  97. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
  98. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
  99. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
  100. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
  101. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
  102. wisent/core/data_loaders/__init__.py +0 -0
  103. wisent/core/data_loaders/core/__init__.py +0 -0
  104. wisent/core/data_loaders/core/atoms.py +98 -0
  105. wisent/core/data_loaders/loaders/__init__.py +0 -0
  106. wisent/core/data_loaders/loaders/custom.py +120 -0
  107. wisent/core/data_loaders/loaders/lm_loader.py +218 -0
  108. wisent/core/detection_handling.py +257 -0
  109. wisent/core/download_full_benchmarks.py +1386 -0
  110. wisent/core/evaluators/__init__.py +0 -0
  111. wisent/core/evaluators/oracles/__init__.py +0 -0
  112. wisent/core/evaluators/oracles/interactive.py +73 -0
  113. wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
  114. wisent/core/evaluators/oracles/user_specified.py +67 -0
  115. wisent/core/hyperparameter_optimizer.py +429 -0
  116. wisent/core/lm_eval_harness_ground_truth.py +1396 -0
  117. wisent/core/log_likelihoods_evaluator.py +321 -0
  118. wisent/core/managed_cached_benchmarks.py +595 -0
  119. wisent/core/mixed_benchmark_sampler.py +364 -0
  120. wisent/core/model_config_manager.py +330 -0
  121. wisent/core/model_persistence.py +317 -0
  122. wisent/core/models/__init__.py +0 -0
  123. wisent/core/models/core/__init__.py +0 -0
  124. wisent/core/models/core/atoms.py +460 -0
  125. wisent/core/models/wisent_model.py +727 -0
  126. wisent/core/multi_steering.py +316 -0
  127. wisent/core/optuna/__init__.py +57 -0
  128. wisent/core/optuna/classifier/__init__.py +25 -0
  129. wisent/core/optuna/classifier/activation_generator.py +349 -0
  130. wisent/core/optuna/classifier/classifier_cache.py +509 -0
  131. wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
  132. wisent/core/optuna/steering/__init__.py +0 -0
  133. wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
  134. wisent/core/optuna/steering/data_utils.py +342 -0
  135. wisent/core/optuna/steering/metrics.py +474 -0
  136. wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
  137. wisent/core/optuna/steering/steering_optimization.py +1111 -0
  138. wisent/core/parser.py +1668 -0
  139. wisent/core/prompts/__init__.py +0 -0
  140. wisent/core/prompts/core/__init__.py +0 -0
  141. wisent/core/prompts/core/atom.py +57 -0
  142. wisent/core/prompts/core/prompt_formater.py +157 -0
  143. wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
  144. wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
  145. wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
  146. wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
  147. wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
  148. wisent/core/representation.py +5 -0
  149. wisent/core/sample_size_optimizer.py +648 -0
  150. wisent/core/sample_size_optimizer_v2.py +355 -0
  151. wisent/core/save_results.py +277 -0
  152. wisent/core/steering.py +652 -0
  153. wisent/core/steering_method.py +26 -0
  154. wisent/core/steering_methods/__init__.py +0 -0
  155. wisent/core/steering_methods/core/__init__.py +0 -0
  156. wisent/core/steering_methods/core/atoms.py +153 -0
  157. wisent/core/steering_methods/methods/__init__.py +0 -0
  158. wisent/core/steering_methods/methods/caa.py +44 -0
  159. wisent/core/steering_optimizer.py +1297 -0
  160. wisent/core/task_interface.py +132 -0
  161. wisent/core/task_selector.py +189 -0
  162. wisent/core/tasks/__init__.py +175 -0
  163. wisent/core/tasks/aime_task.py +141 -0
  164. wisent/core/tasks/file_task.py +211 -0
  165. wisent/core/tasks/hle_task.py +180 -0
  166. wisent/core/tasks/hmmt_task.py +119 -0
  167. wisent/core/tasks/livecodebench_task.py +201 -0
  168. wisent/core/tasks/livemathbench_task.py +158 -0
  169. wisent/core/tasks/lm_eval_task.py +455 -0
  170. wisent/core/tasks/math500_task.py +84 -0
  171. wisent/core/tasks/polymath_task.py +146 -0
  172. wisent/core/tasks/supergpqa_task.py +220 -0
  173. wisent/core/time_estimator.py +149 -0
  174. wisent/core/timing_calibration.py +174 -0
  175. wisent/core/tracking/__init__.py +54 -0
  176. wisent/core/tracking/latency.py +618 -0
  177. wisent/core/tracking/memory.py +359 -0
  178. wisent/core/trainers/__init__.py +0 -0
  179. wisent/core/trainers/core/__init__.py +11 -0
  180. wisent/core/trainers/core/atoms.py +45 -0
  181. wisent/core/trainers/steering_trainer.py +271 -0
  182. wisent/core/user_model_config.py +158 -0
  183. wisent/opti/__init__.py +0 -0
  184. wisent/opti/core/__init__.py +0 -0
  185. wisent/opti/core/atoms.py +175 -0
  186. wisent/opti/methods/__init__.py +0 -0
  187. wisent/opti/methods/opti_classificator.py +172 -0
  188. wisent/opti/methods/opti_steering.py +138 -0
  189. wisent/synthetic/__init__.py +0 -0
  190. wisent/synthetic/cleaners/__init__.py +0 -0
  191. wisent/synthetic/cleaners/core/__init__.py +0 -0
  192. wisent/synthetic/cleaners/core/atoms.py +58 -0
  193. wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
  194. wisent/synthetic/cleaners/methods/__init__.py +0 -0
  195. wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
  196. wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
  197. wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
  198. wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
  199. wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
  200. wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
  201. wisent/synthetic/db_instructions/__init__.py +0 -0
  202. wisent/synthetic/db_instructions/core/__init__.py +0 -0
  203. wisent/synthetic/db_instructions/core/atoms.py +25 -0
  204. wisent/synthetic/db_instructions/mini_dp.py +37 -0
  205. wisent/synthetic/generators/__init__.py +0 -0
  206. wisent/synthetic/generators/core/__init__.py +0 -0
  207. wisent/synthetic/generators/core/atoms.py +73 -0
  208. wisent/synthetic/generators/diversities/__init__.py +0 -0
  209. wisent/synthetic/generators/diversities/core/__init__.py +0 -0
  210. wisent/synthetic/generators/diversities/core/core.py +68 -0
  211. wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
  212. wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
  213. wisent/synthetic/generators/pairs_generator.py +179 -0
  214. wisent-0.5.1.dist-info/METADATA +67 -0
  215. wisent-0.5.1.dist-info/RECORD +218 -0
  216. {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/WHEEL +1 -1
  217. {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info/licenses}/LICENSE +2 -2
  218. wisent/activations/__init__.py +0 -9
  219. wisent/activations/client.py +0 -97
  220. wisent/activations/extractor.py +0 -251
  221. wisent/activations/models.py +0 -95
  222. wisent/client.py +0 -45
  223. wisent/control_vector/__init__.py +0 -9
  224. wisent/control_vector/client.py +0 -85
  225. wisent/control_vector/manager.py +0 -168
  226. wisent/control_vector/models.py +0 -70
  227. wisent/inference/__init__.py +0 -9
  228. wisent/inference/client.py +0 -103
  229. wisent/inference/inferencer.py +0 -250
  230. wisent/inference/models.py +0 -66
  231. wisent/utils/__init__.py +0 -3
  232. wisent/utils/auth.py +0 -30
  233. wisent/utils/http.py +0 -228
  234. wisent/version.py +0 -3
  235. wisent-0.1.1.dist-info/METADATA +0 -142
  236. wisent-0.1.1.dist-info/RECORD +0 -23
  237. {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,638 @@
1
+ """
2
+ Budget and resource management for wisent-guard agent operations.
3
+
4
+ This module provides utilities for managing time budgets, resource allocation,
5
+ and optimizing task execution within specified constraints.
6
+ """
7
+
8
+ from typing import Dict, List, Tuple, Optional, Any
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ import time
12
+ import math
13
+
14
+
15
+ class ResourceType(Enum):
16
+ """Types of resources that can be budgeted."""
17
+ TIME = "time"
18
+ MEMORY = "memory"
19
+ COMPUTE = "compute"
20
+ TOKENS = "tokens"
21
+
22
+
23
+ @dataclass
24
+ class ResourceBudget:
25
+ """Represents a budget for a specific resource type."""
26
+ resource_type: ResourceType
27
+ total_budget: float
28
+ used_budget: float = 0.0
29
+ unit: str = ""
30
+
31
+ @property
32
+ def remaining_budget(self) -> float:
33
+ """Calculate remaining budget."""
34
+ return max(0.0, self.total_budget - self.used_budget)
35
+
36
+ @property
37
+ def usage_percentage(self) -> float:
38
+ """Calculate percentage of budget used."""
39
+ if self.total_budget <= 0:
40
+ return 0.0
41
+ return (self.used_budget / self.total_budget) * 100.0
42
+
43
+ def can_afford(self, cost: float) -> bool:
44
+ """Check if we can afford a given cost."""
45
+ return self.remaining_budget >= cost
46
+
47
+ def spend(self, amount: float) -> bool:
48
+ """Spend from the budget. Returns True if successful."""
49
+ if self.can_afford(amount):
50
+ self.used_budget += amount
51
+ return True
52
+ return False
53
+
54
+
55
+ @dataclass
56
+ class TaskEstimate:
57
+ """Estimates for a specific task."""
58
+ task_name: str
59
+ time_seconds: float
60
+ memory_mb: float = 0.0
61
+ compute_units: float = 0.0
62
+ tokens: int = 0
63
+
64
+ def scale(self, factor: float) -> 'TaskEstimate':
65
+ """Scale all estimates by a factor."""
66
+ return TaskEstimate(
67
+ task_name=self.task_name,
68
+ time_seconds=self.time_seconds * factor,
69
+ memory_mb=self.memory_mb * factor,
70
+ compute_units=self.compute_units * factor,
71
+ tokens=int(self.tokens * factor)
72
+ )
73
+
74
+
75
+ class BudgetManager:
76
+ """Manages budgets and resource allocation for agent operations."""
77
+
78
+ def __init__(self):
79
+ self.budgets: Dict[ResourceType, ResourceBudget] = {}
80
+ self.task_estimates: Dict[str, TaskEstimate] = {}
81
+ self._default_estimates = self._get_default_task_estimates()
82
+
83
+ def set_time_budget(self, minutes: float) -> None:
84
+ """Set a time budget in minutes."""
85
+ self.budgets[ResourceType.TIME] = ResourceBudget(
86
+ resource_type=ResourceType.TIME,
87
+ total_budget=minutes * 60.0, # Convert to seconds
88
+ unit="seconds"
89
+ )
90
+
91
+ def set_budget(self, resource_type: ResourceType, amount: float, unit: str = "") -> None:
92
+ """Set a budget for any resource type."""
93
+ self.budgets[resource_type] = ResourceBudget(
94
+ resource_type=resource_type,
95
+ total_budget=amount,
96
+ unit=unit
97
+ )
98
+
99
+ def get_budget(self, resource_type: ResourceType) -> Optional[ResourceBudget]:
100
+ """Get budget for a specific resource type."""
101
+ return self.budgets.get(resource_type)
102
+
103
+ def optimize_task_allocation(self,
104
+ task_candidates: List[str],
105
+ primary_resource: ResourceType = ResourceType.TIME,
106
+ max_tasks: Optional[int] = None) -> List[str]:
107
+ """
108
+ Optimize task allocation within budget constraints.
109
+
110
+ Args:
111
+ task_candidates: List of candidate task names
112
+ primary_resource: Primary resource to optimize for
113
+ max_tasks: Maximum number of tasks to select
114
+
115
+ Returns:
116
+ List of selected tasks that fit within budget
117
+ """
118
+ budget = self.budgets.get(primary_resource)
119
+ if not budget:
120
+ return task_candidates[:max_tasks] if max_tasks else task_candidates
121
+
122
+ # Calculate cost for each task
123
+ task_costs = []
124
+ for task in task_candidates:
125
+ cost = self._estimate_task_cost(task, primary_resource)
126
+ if cost > 0:
127
+ task_costs.append((task, cost))
128
+
129
+ # Sort by cost (ascending) to prioritize cheaper tasks
130
+ task_costs.sort(key=lambda x: x[1])
131
+
132
+ # Select tasks that fit within budget
133
+ selected_tasks = []
134
+ remaining_budget = budget.remaining_budget
135
+
136
+ for task, cost in task_costs:
137
+ if cost <= remaining_budget:
138
+ selected_tasks.append(task)
139
+ remaining_budget -= cost
140
+
141
+ if max_tasks and len(selected_tasks) >= max_tasks:
142
+ break
143
+
144
+ return selected_tasks
145
+
146
+ def calculate_max_tasks_for_budget(self,
147
+ task_type: str = "default",
148
+ time_budget_minutes: float = 5.0) -> int:
149
+ """
150
+ Calculate maximum number of tasks that can fit within a time budget.
151
+
152
+ Args:
153
+ task_type: Type of task to estimate
154
+ time_budget_minutes: Time budget in minutes
155
+
156
+ Returns:
157
+ Maximum number of tasks
158
+ """
159
+ time_budget_seconds = time_budget_minutes * 60.0
160
+
161
+ # Get estimate for this task type
162
+ task_estimate = self._estimate_task_cost(task_type, ResourceType.TIME)
163
+
164
+ if task_estimate <= 0:
165
+ return 1 # Fallback to at least 1 task
166
+
167
+ max_tasks = max(1, int(time_budget_seconds / task_estimate))
168
+ return max_tasks
169
+
170
+ def estimate_completion_time(self, tasks: List[str]) -> float:
171
+ """
172
+ Estimate total completion time for a list of tasks.
173
+
174
+ Args:
175
+ tasks: List of task names
176
+
177
+ Returns:
178
+ Estimated time in seconds
179
+ """
180
+ total_time = 0.0
181
+ for task in tasks:
182
+ total_time += self._estimate_task_cost(task, ResourceType.TIME)
183
+ return total_time
184
+
185
+ def track_task_execution(self, task_name: str, start_time: float, end_time: float) -> None:
186
+ """
187
+ Track actual execution time for a task to improve future estimates.
188
+
189
+ Args:
190
+ task_name: Name of the task
191
+ start_time: Start timestamp
192
+ end_time: End timestamp
193
+ """
194
+ actual_time = end_time - start_time
195
+
196
+ # Update our estimates based on actual performance
197
+ if task_name in self.task_estimates:
198
+ # Use exponential moving average to update estimates
199
+ current_estimate = self.task_estimates[task_name].time_seconds
200
+ alpha = 0.3 # Learning rate
201
+ new_estimate = alpha * actual_time + (1 - alpha) * current_estimate
202
+ self.task_estimates[task_name].time_seconds = new_estimate
203
+ else:
204
+ # First time seeing this task
205
+ self.task_estimates[task_name] = TaskEstimate(
206
+ task_name=task_name,
207
+ time_seconds=actual_time
208
+ )
209
+
210
+ def get_budget_summary(self) -> Dict[str, Any]:
211
+ """Get a summary of all budgets and their usage."""
212
+ summary = {}
213
+ for resource_type, budget in self.budgets.items():
214
+ summary[resource_type.value] = {
215
+ "total": budget.total_budget,
216
+ "used": budget.used_budget,
217
+ "remaining": budget.remaining_budget,
218
+ "percentage_used": budget.usage_percentage,
219
+ "unit": budget.unit
220
+ }
221
+ return summary
222
+
223
+ def _estimate_task_cost(self, task_name: str, resource_type: ResourceType) -> float:
224
+ """Estimate the cost of a task for a specific resource type."""
225
+ # Check if we have a specific estimate for this task
226
+ if task_name in self.task_estimates:
227
+ estimate = self.task_estimates[task_name]
228
+ if resource_type == ResourceType.TIME:
229
+ return estimate.time_seconds
230
+ elif resource_type == ResourceType.MEMORY:
231
+ return estimate.memory_mb
232
+ elif resource_type == ResourceType.COMPUTE:
233
+ return estimate.compute_units
234
+ elif resource_type == ResourceType.TOKENS:
235
+ return float(estimate.tokens)
236
+
237
+ # Fall back to default estimates
238
+ return self._get_default_cost_estimate(task_name, resource_type)
239
+
240
+ def _get_default_cost_estimate(self, task_name: str, resource_type: ResourceType) -> float:
241
+ """Get default cost estimate for a task using device benchmarking."""
242
+ if resource_type == ResourceType.TIME:
243
+ # Use device-specific benchmarks for time estimates
244
+ try:
245
+ from .device_benchmarks import estimate_task_time
246
+
247
+ # Map task names to benchmark types
248
+ task_mapping = {
249
+ "benchmark": "benchmark_eval",
250
+ "eval": "benchmark_eval",
251
+ "classifier": "classifier_training",
252
+ "training": "classifier_training",
253
+ "generation": "data_generation",
254
+ "synthetic": "data_generation",
255
+ "steering": "steering",
256
+ "model_loading": "model_loading"
257
+ }
258
+
259
+ # Find the best matching task type
260
+ benchmark_type = None
261
+ for pattern, task_type in task_mapping.items():
262
+ if pattern in task_name.lower():
263
+ benchmark_type = task_type
264
+ break
265
+
266
+ if benchmark_type:
267
+ # Get quantity based on task type
268
+ if benchmark_type in ["benchmark_eval", "classifier_training"]:
269
+ quantity = 100 # Base unit for these tasks
270
+ else:
271
+ quantity = 1
272
+
273
+ return estimate_task_time(benchmark_type, quantity)
274
+ else:
275
+ # Use benchmark_eval as default
276
+ return estimate_task_time("benchmark_eval", 100)
277
+
278
+ except Exception as e:
279
+ raise RuntimeError(f"Device benchmark estimate failed for task '{task_name}': {e}. Run device benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
280
+
281
+ elif resource_type == ResourceType.MEMORY:
282
+ raise RuntimeError(f"Memory estimation not implemented for task '{task_name}'")
283
+
284
+ elif resource_type == ResourceType.COMPUTE:
285
+ raise RuntimeError(f"Compute estimation not implemented for task '{task_name}'")
286
+
287
+ elif resource_type == ResourceType.TOKENS:
288
+ raise RuntimeError(f"Token estimation not implemented for task '{task_name}'")
289
+
290
+ raise RuntimeError(f"Unknown resource type: {resource_type}")
291
+
292
+ def _get_default_task_estimates(self) -> Dict[str, TaskEstimate]:
293
+ """Get default task estimates for common operations."""
294
+ # No default estimates - all estimates must come from device benchmarks
295
+ return {}
296
+
297
+
298
+ # Global budget manager instance
299
+ _budget_manager = BudgetManager()
300
+
301
+
302
+ def get_budget_manager() -> BudgetManager:
303
+ """Get the global budget manager instance."""
304
+ return _budget_manager
305
+
306
+
307
+ def set_time_budget(minutes: float) -> None:
308
+ """Convenience function to set time budget."""
309
+ _budget_manager.set_time_budget(minutes)
310
+
311
+
312
+ def calculate_max_tasks_for_time_budget(task_type: str = "benchmark_evaluation",
313
+ time_budget_minutes: float = 5.0) -> int:
314
+ """
315
+ Calculate maximum number of tasks that can fit within a time budget.
316
+
317
+ Args:
318
+ task_type: Type of task to estimate (benchmark_evaluation, classifier_training, etc.)
319
+ time_budget_minutes: Time budget in minutes
320
+
321
+ Returns:
322
+ Maximum number of tasks
323
+ """
324
+ # Use device benchmarking for more accurate estimates
325
+ try:
326
+ from .device_benchmarks import estimate_task_time
327
+
328
+ # Map task types to benchmark types
329
+ benchmark_mapping = {
330
+ "benchmark_evaluation": "benchmark_eval",
331
+ "classifier_training": "classifier_training",
332
+ "data_generation": "data_generation",
333
+ "steering": "steering",
334
+ "model_loading": "model_loading"
335
+ }
336
+
337
+ benchmark_type = benchmark_mapping.get(task_type, "benchmark_eval")
338
+
339
+ # Get time per task
340
+ if benchmark_type in ["benchmark_eval", "classifier_training"]:
341
+ time_per_task = estimate_task_time(benchmark_type, 100) / 100 # Per unit
342
+ else:
343
+ time_per_task = estimate_task_time(benchmark_type, 1)
344
+
345
+ time_budget_seconds = time_budget_minutes * 60.0
346
+ max_tasks = max(1, int(time_budget_seconds / time_per_task))
347
+
348
+ return max_tasks
349
+
350
+ except Exception as e:
351
+ raise RuntimeError(f"Budget calculation failed for task '{task_type}': {e}. Run device benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
352
+
353
+
354
+ def optimize_tasks_for_budget(task_candidates: List[str],
355
+ time_budget_minutes: float = 5.0,
356
+ max_tasks: Optional[int] = None) -> List[str]:
357
+ """
358
+ Optimize task selection within a time budget.
359
+
360
+ Args:
361
+ task_candidates: List of candidate task names
362
+ time_budget_minutes: Time budget in minutes
363
+ max_tasks: Maximum number of tasks to select
364
+
365
+ Returns:
366
+ List of selected tasks that fit within budget
367
+ """
368
+ _budget_manager.set_time_budget(time_budget_minutes)
369
+ return _budget_manager.optimize_task_allocation(
370
+ task_candidates,
371
+ ResourceType.TIME,
372
+ max_tasks
373
+ )
374
+
375
+
376
+ def optimize_benchmarks_for_budget(task_candidates: List[str],
377
+ time_budget_minutes: float = 5.0,
378
+ max_tasks: Optional[int] = None,
379
+ prefer_fast: bool = False) -> List[str]:
380
+ """
381
+ Optimize benchmark selection within a time budget using priority and loading time data.
382
+
383
+ Args:
384
+ task_candidates: List of candidate benchmark names
385
+ time_budget_minutes: Time budget in minutes
386
+ max_tasks: Maximum number of tasks to select
387
+ prefer_fast: Whether to prefer fast benchmarks
388
+
389
+ Returns:
390
+ List of selected benchmarks that fit within budget
391
+ """
392
+ try:
393
+ # Import benchmark data
394
+ import sys
395
+ import os
396
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lm-harness-integration'))
397
+ from only_benchmarks import BENCHMARKS
398
+
399
+ # Get benchmark information with loading times
400
+ benchmark_info = []
401
+ for task in task_candidates:
402
+ if task in BENCHMARKS:
403
+ config = BENCHMARKS[task]
404
+ loading_time = config.get('loading_time', 60.0) # seconds
405
+ priority = config.get('priority', 'unknown')
406
+
407
+ # Calculate priority score for selection
408
+ priority_score = 0
409
+ if priority == 'high':
410
+ priority_score = 3
411
+ elif priority == 'medium':
412
+ priority_score = 2
413
+ elif priority == 'low':
414
+ priority_score = 1
415
+
416
+ # Calculate efficiency score (priority per second)
417
+ efficiency_score = priority_score / max(loading_time, 1.0)
418
+
419
+ benchmark_info.append({
420
+ 'task': task,
421
+ 'loading_time': loading_time,
422
+ 'priority': priority,
423
+ 'priority_score': priority_score,
424
+ 'efficiency_score': efficiency_score
425
+ })
426
+ else:
427
+ # Fallback for unknown benchmarks
428
+ benchmark_info.append({
429
+ 'task': task,
430
+ 'loading_time': 60.0,
431
+ 'priority': 'unknown',
432
+ 'priority_score': 0,
433
+ 'efficiency_score': 0.0
434
+ })
435
+
436
+ # Sort by efficiency (prefer fast) or priority (prefer high priority)
437
+ if prefer_fast:
438
+ benchmark_info.sort(key=lambda x: x['efficiency_score'], reverse=True)
439
+ else:
440
+ benchmark_info.sort(key=lambda x: (x['priority_score'], -x['loading_time']), reverse=True)
441
+
442
+ # Select benchmarks that fit within budget
443
+ selected_benchmarks = []
444
+ total_time = 0.0
445
+ time_budget_seconds = time_budget_minutes * 60.0
446
+
447
+ for info in benchmark_info:
448
+ if total_time + info['loading_time'] <= time_budget_seconds:
449
+ selected_benchmarks.append(info['task'])
450
+ total_time += info['loading_time']
451
+
452
+ if max_tasks and len(selected_benchmarks) >= max_tasks:
453
+ break
454
+
455
+ return selected_benchmarks
456
+
457
+ except Exception as e:
458
+ print(f" āš ļø Priority-aware budget optimization failed: {e}")
459
+ print(f" šŸ”„ Falling back to basic budget optimization...")
460
+ return optimize_tasks_for_budget(task_candidates, time_budget_minutes, max_tasks)
461
+
462
+
463
+ def estimate_completion_time_minutes(tasks: List[str]) -> float:
464
+ """
465
+ Estimate total completion time for tasks in minutes.
466
+
467
+ Args:
468
+ tasks: List of task names
469
+
470
+ Returns:
471
+ Estimated time in minutes
472
+ """
473
+ seconds = _budget_manager.estimate_completion_time(tasks)
474
+ return seconds / 60.0
475
+
476
+
477
+ def track_task_performance(task_name: str, start_time: float, end_time: float) -> None:
478
+ """
479
+ Track actual task performance to improve future estimates.
480
+
481
+ Args:
482
+ task_name: Name of the task
483
+ start_time: Start timestamp
484
+ end_time: End timestamp
485
+ """
486
+ _budget_manager.track_task_execution(task_name, start_time, end_time)
487
+
488
+
489
+ def run_device_benchmark(force_rerun: bool = False) -> None:
490
+ """
491
+ Run device performance benchmark and save results.
492
+
493
+ Args:
494
+ force_rerun: Force re-run even if cached results exist
495
+ """
496
+ from .device_benchmarks import ensure_benchmark_exists
497
+
498
+ print("šŸš€ Running device performance benchmark...")
499
+ benchmark = ensure_benchmark_exists(force_rerun=force_rerun)
500
+
501
+ print("\nāœ… Benchmark Results:")
502
+ print("=" * 50)
503
+ print(f"Device ID: {benchmark.device_id[:12]}...")
504
+ print(f"Device Type: {benchmark.device_type}")
505
+ print(f"Model Loading: {benchmark.model_loading_seconds:.1f}s")
506
+ print(f"Evaluation: {benchmark.benchmark_eval_seconds_per_100_examples:.1f}s per 100 examples")
507
+ print(f"Classifier Training: {benchmark.classifier_training_seconds_per_100_samples:.1f}s per 100 samples")
508
+ print(f"Steering: {benchmark.steering_seconds_per_example:.1f}s per example")
509
+ print(f"Data Generation: {benchmark.data_generation_seconds_per_example:.1f}s per example")
510
+ print(f"\nResults saved to: device_benchmarks.json")
511
+
512
+ # Show some example estimates
513
+ print("\nšŸ“Š Example Time Estimates:")
514
+ print("-" * 30)
515
+ print(f"Loading model: {benchmark.model_loading_seconds:.1f}s")
516
+ print(f"100 eval examples: {benchmark.benchmark_eval_seconds_per_100_examples:.1f}s")
517
+ print(f"Training classifier (200 samples): {(benchmark.classifier_training_seconds_per_100_samples * 2):.1f}s")
518
+ print(f"10 steering examples: {(benchmark.steering_seconds_per_example * 10):.1f}s")
519
+
520
+
521
+ def get_device_info() -> Dict[str, str]:
522
+ """Get current device information."""
523
+ from .device_benchmarks import get_current_device_info
524
+ return get_current_device_info()
525
+
526
+
527
+ def estimate_task_time_direct(task_type: str, quantity: int = 1) -> float:
528
+ """
529
+ Direct estimate of task time using device benchmarks.
530
+
531
+ Args:
532
+ task_type: Type of task ("model_loading", "benchmark_eval", etc.)
533
+ quantity: Number of items
534
+
535
+ Returns:
536
+ Estimated time in seconds
537
+ """
538
+ from .device_benchmarks import estimate_task_time
539
+ return estimate_task_time(task_type, quantity)
540
+
541
+
542
+ # CLI functionality for budget management
543
+ def main():
544
+ """CLI entry point for budget management and benchmarking."""
545
+ import argparse
546
+ import sys
547
+
548
+ parser = argparse.ArgumentParser(
549
+ description="wisent-guard budget management and device benchmarking"
550
+ )
551
+
552
+ subparsers = parser.add_subparsers(dest='command', help='Available commands')
553
+
554
+ # Benchmark command
555
+ benchmark_parser = subparsers.add_parser('benchmark', help='Run device benchmark')
556
+ benchmark_parser.add_argument(
557
+ '--force', '-f',
558
+ action='store_true',
559
+ help='Force re-run benchmark even if cached results exist'
560
+ )
561
+
562
+ # Info command
563
+ info_parser = subparsers.add_parser('info', help='Show device information')
564
+
565
+ # Estimate command
566
+ estimate_parser = subparsers.add_parser('estimate', help='Estimate task time')
567
+ estimate_parser.add_argument('task_type', help='Type of task')
568
+ estimate_parser.add_argument('quantity', type=int, help='Number of items')
569
+
570
+ # Budget command
571
+ budget_parser = subparsers.add_parser('budget', help='Calculate budget allocations')
572
+ budget_parser.add_argument('--time-minutes', '-t', type=float, default=5.0, help='Time budget in minutes')
573
+ budget_parser.add_argument('--task-type', default='benchmark_evaluation', help='Task type to optimize for')
574
+
575
+ args = parser.parse_args()
576
+
577
+ if not args.command:
578
+ parser.print_help()
579
+ return 1
580
+
581
+ try:
582
+ if args.command == 'benchmark':
583
+ run_device_benchmark(force_rerun=args.force)
584
+
585
+ elif args.command == 'info':
586
+ print("šŸ–„ļø Current Device Information")
587
+ print("=" * 40)
588
+ device_info = get_device_info()
589
+ for key, value in device_info.items():
590
+ print(f"{key}: {value}")
591
+
592
+ elif args.command == 'estimate':
593
+ estimated_seconds = estimate_task_time_direct(args.task_type, args.quantity)
594
+ print(f"ā±ļø Estimated time for {args.quantity}x {args.task_type}: {estimated_seconds:.1f} seconds ({estimated_seconds/60:.2f} minutes)")
595
+
596
+ elif args.command == 'budget':
597
+ max_tasks = calculate_max_tasks_for_time_budget(args.task_type, args.time_minutes)
598
+
599
+ # Map task types to benchmark types for direct estimation
600
+ benchmark_mapping = {
601
+ "benchmark_evaluation": "benchmark_eval",
602
+ "classifier_training": "classifier_training",
603
+ "data_generation": "data_generation",
604
+ "steering": "steering",
605
+ "model_loading": "model_loading"
606
+ }
607
+
608
+ benchmark_type = benchmark_mapping.get(args.task_type, "benchmark_eval")
609
+
610
+ # Get time per individual task unit
611
+ if benchmark_type in ["benchmark_eval", "classifier_training"]:
612
+ task_time = estimate_task_time_direct(benchmark_type, 100) / 100 # Per unit
613
+ else:
614
+ task_time = estimate_task_time_direct(benchmark_type, 1)
615
+
616
+ total_time = max_tasks * task_time
617
+
618
+ print(f"šŸ’° Budget Analysis:")
619
+ print(f"Time budget: {args.time_minutes:.1f} minutes ({args.time_minutes * 60:.0f} seconds)")
620
+ print(f"Task type: {args.task_type} (mapped to {benchmark_type})")
621
+ print(f"Time per task: {task_time:.2f} seconds")
622
+ print(f"Max tasks: {max_tasks}")
623
+ print(f"Total estimated time: {total_time:.1f} seconds ({total_time/60:.2f} minutes)")
624
+ print(f"Budget utilization: {(total_time / (args.time_minutes * 60)) * 100:.1f}%")
625
+
626
+ except KeyboardInterrupt:
627
+ print("\nāŒ Operation interrupted by user")
628
+ return 1
629
+ except Exception as e:
630
+ print(f"āŒ Error: {e}")
631
+ return 1
632
+
633
+ return 0
634
+
635
+
636
+ if __name__ == "__main__":
637
+ import sys
638
+ sys.exit(main())