wisent 0.1.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (237) hide show
  1. wisent/__init__.py +1 -8
  2. wisent/benchmarks/__init__.py +0 -0
  3. wisent/benchmarks/coding/__init__.py +0 -0
  4. wisent/benchmarks/coding/metrics/__init__.py +0 -0
  5. wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
  6. wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
  7. wisent/benchmarks/coding/metrics/evaluator.py +275 -0
  8. wisent/benchmarks/coding/metrics/passk.py +66 -0
  9. wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
  10. wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
  11. wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
  12. wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
  13. wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
  14. wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
  15. wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
  16. wisent/benchmarks/coding/providers/__init__.py +18 -0
  17. wisent/benchmarks/coding/providers/core/__init__.py +0 -0
  18. wisent/benchmarks/coding/providers/core/atoms.py +31 -0
  19. wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
  20. wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
  21. wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
  22. wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
  23. wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
  24. wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
  25. wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
  26. wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
  27. wisent/classifiers/__init__.py +0 -0
  28. wisent/classifiers/core/__init__.py +0 -0
  29. wisent/classifiers/core/atoms.py +747 -0
  30. wisent/classifiers/models/__init__.py +0 -0
  31. wisent/classifiers/models/logistic.py +29 -0
  32. wisent/classifiers/models/mlp.py +47 -0
  33. wisent/cli/__init__.py +0 -0
  34. wisent/cli/classifiers/__init__.py +0 -0
  35. wisent/cli/classifiers/classifier_rotator.py +137 -0
  36. wisent/cli/cli_logger.py +142 -0
  37. wisent/cli/data_loaders/__init__.py +0 -0
  38. wisent/cli/data_loaders/data_loader_rotator.py +96 -0
  39. wisent/cli/evaluators/__init__.py +0 -0
  40. wisent/cli/evaluators/evaluator_rotator.py +148 -0
  41. wisent/cli/steering_methods/__init__.py +0 -0
  42. wisent/cli/steering_methods/steering_rotator.py +110 -0
  43. wisent/cli/wisent_cli/__init__.py +0 -0
  44. wisent/cli/wisent_cli/commands/__init__.py +0 -0
  45. wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
  46. wisent/cli/wisent_cli/commands/listing.py +154 -0
  47. wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
  48. wisent/cli/wisent_cli/main.py +93 -0
  49. wisent/cli/wisent_cli/shell.py +80 -0
  50. wisent/cli/wisent_cli/ui.py +69 -0
  51. wisent/cli/wisent_cli/util/__init__.py +0 -0
  52. wisent/cli/wisent_cli/util/aggregations.py +43 -0
  53. wisent/cli/wisent_cli/util/parsing.py +126 -0
  54. wisent/cli/wisent_cli/version.py +4 -0
  55. wisent/core/__init__.py +27 -0
  56. wisent/core/activations/__init__.py +0 -0
  57. wisent/core/activations/activations_collector.py +338 -0
  58. wisent/core/activations/core/__init__.py +0 -0
  59. wisent/core/activations/core/atoms.py +216 -0
  60. wisent/core/agent/__init__.py +18 -0
  61. wisent/core/agent/budget.py +638 -0
  62. wisent/core/agent/device_benchmarks.py +685 -0
  63. wisent/core/agent/diagnose/__init__.py +55 -0
  64. wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
  65. wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
  66. wisent/core/agent/diagnose/create_classifier.py +1154 -0
  67. wisent/core/agent/diagnose/response_diagnostics.py +268 -0
  68. wisent/core/agent/diagnose/select_classifiers.py +506 -0
  69. wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
  70. wisent/core/agent/diagnose/tasks/__init__.py +33 -0
  71. wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
  72. wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
  73. wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
  74. wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
  75. wisent/core/agent/diagnose.py +242 -0
  76. wisent/core/agent/steer.py +212 -0
  77. wisent/core/agent/timeout.py +134 -0
  78. wisent/core/autonomous_agent.py +1234 -0
  79. wisent/core/bigcode_integration.py +583 -0
  80. wisent/core/contrastive_pairs/__init__.py +15 -0
  81. wisent/core/contrastive_pairs/core/__init__.py +0 -0
  82. wisent/core/contrastive_pairs/core/atoms.py +45 -0
  83. wisent/core/contrastive_pairs/core/buliders.py +59 -0
  84. wisent/core/contrastive_pairs/core/pair.py +178 -0
  85. wisent/core/contrastive_pairs/core/response.py +152 -0
  86. wisent/core/contrastive_pairs/core/serialization.py +300 -0
  87. wisent/core/contrastive_pairs/core/set.py +133 -0
  88. wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
  89. wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
  90. wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
  91. wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
  92. wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
  93. wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
  94. wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
  95. wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
  96. wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
  97. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
  98. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
  99. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
  100. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
  101. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
  102. wisent/core/data_loaders/__init__.py +0 -0
  103. wisent/core/data_loaders/core/__init__.py +0 -0
  104. wisent/core/data_loaders/core/atoms.py +98 -0
  105. wisent/core/data_loaders/loaders/__init__.py +0 -0
  106. wisent/core/data_loaders/loaders/custom.py +120 -0
  107. wisent/core/data_loaders/loaders/lm_loader.py +218 -0
  108. wisent/core/detection_handling.py +257 -0
  109. wisent/core/download_full_benchmarks.py +1386 -0
  110. wisent/core/evaluators/__init__.py +0 -0
  111. wisent/core/evaluators/oracles/__init__.py +0 -0
  112. wisent/core/evaluators/oracles/interactive.py +73 -0
  113. wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
  114. wisent/core/evaluators/oracles/user_specified.py +67 -0
  115. wisent/core/hyperparameter_optimizer.py +429 -0
  116. wisent/core/lm_eval_harness_ground_truth.py +1396 -0
  117. wisent/core/log_likelihoods_evaluator.py +321 -0
  118. wisent/core/managed_cached_benchmarks.py +595 -0
  119. wisent/core/mixed_benchmark_sampler.py +364 -0
  120. wisent/core/model_config_manager.py +330 -0
  121. wisent/core/model_persistence.py +317 -0
  122. wisent/core/models/__init__.py +0 -0
  123. wisent/core/models/core/__init__.py +0 -0
  124. wisent/core/models/core/atoms.py +460 -0
  125. wisent/core/models/wisent_model.py +727 -0
  126. wisent/core/multi_steering.py +316 -0
  127. wisent/core/optuna/__init__.py +57 -0
  128. wisent/core/optuna/classifier/__init__.py +25 -0
  129. wisent/core/optuna/classifier/activation_generator.py +349 -0
  130. wisent/core/optuna/classifier/classifier_cache.py +509 -0
  131. wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
  132. wisent/core/optuna/steering/__init__.py +0 -0
  133. wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
  134. wisent/core/optuna/steering/data_utils.py +342 -0
  135. wisent/core/optuna/steering/metrics.py +474 -0
  136. wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
  137. wisent/core/optuna/steering/steering_optimization.py +1111 -0
  138. wisent/core/parser.py +1668 -0
  139. wisent/core/prompts/__init__.py +0 -0
  140. wisent/core/prompts/core/__init__.py +0 -0
  141. wisent/core/prompts/core/atom.py +57 -0
  142. wisent/core/prompts/core/prompt_formater.py +157 -0
  143. wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
  144. wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
  145. wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
  146. wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
  147. wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
  148. wisent/core/representation.py +5 -0
  149. wisent/core/sample_size_optimizer.py +648 -0
  150. wisent/core/sample_size_optimizer_v2.py +355 -0
  151. wisent/core/save_results.py +277 -0
  152. wisent/core/steering.py +652 -0
  153. wisent/core/steering_method.py +26 -0
  154. wisent/core/steering_methods/__init__.py +0 -0
  155. wisent/core/steering_methods/core/__init__.py +0 -0
  156. wisent/core/steering_methods/core/atoms.py +153 -0
  157. wisent/core/steering_methods/methods/__init__.py +0 -0
  158. wisent/core/steering_methods/methods/caa.py +44 -0
  159. wisent/core/steering_optimizer.py +1297 -0
  160. wisent/core/task_interface.py +132 -0
  161. wisent/core/task_selector.py +189 -0
  162. wisent/core/tasks/__init__.py +175 -0
  163. wisent/core/tasks/aime_task.py +141 -0
  164. wisent/core/tasks/file_task.py +211 -0
  165. wisent/core/tasks/hle_task.py +180 -0
  166. wisent/core/tasks/hmmt_task.py +119 -0
  167. wisent/core/tasks/livecodebench_task.py +201 -0
  168. wisent/core/tasks/livemathbench_task.py +158 -0
  169. wisent/core/tasks/lm_eval_task.py +455 -0
  170. wisent/core/tasks/math500_task.py +84 -0
  171. wisent/core/tasks/polymath_task.py +146 -0
  172. wisent/core/tasks/supergpqa_task.py +220 -0
  173. wisent/core/time_estimator.py +149 -0
  174. wisent/core/timing_calibration.py +174 -0
  175. wisent/core/tracking/__init__.py +54 -0
  176. wisent/core/tracking/latency.py +618 -0
  177. wisent/core/tracking/memory.py +359 -0
  178. wisent/core/trainers/__init__.py +0 -0
  179. wisent/core/trainers/core/__init__.py +11 -0
  180. wisent/core/trainers/core/atoms.py +45 -0
  181. wisent/core/trainers/steering_trainer.py +271 -0
  182. wisent/core/user_model_config.py +158 -0
  183. wisent/opti/__init__.py +0 -0
  184. wisent/opti/core/__init__.py +0 -0
  185. wisent/opti/core/atoms.py +175 -0
  186. wisent/opti/methods/__init__.py +0 -0
  187. wisent/opti/methods/opti_classificator.py +172 -0
  188. wisent/opti/methods/opti_steering.py +138 -0
  189. wisent/synthetic/__init__.py +0 -0
  190. wisent/synthetic/cleaners/__init__.py +0 -0
  191. wisent/synthetic/cleaners/core/__init__.py +0 -0
  192. wisent/synthetic/cleaners/core/atoms.py +58 -0
  193. wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
  194. wisent/synthetic/cleaners/methods/__init__.py +0 -0
  195. wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
  196. wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
  197. wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
  198. wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
  199. wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
  200. wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
  201. wisent/synthetic/db_instructions/__init__.py +0 -0
  202. wisent/synthetic/db_instructions/core/__init__.py +0 -0
  203. wisent/synthetic/db_instructions/core/atoms.py +25 -0
  204. wisent/synthetic/db_instructions/mini_dp.py +37 -0
  205. wisent/synthetic/generators/__init__.py +0 -0
  206. wisent/synthetic/generators/core/__init__.py +0 -0
  207. wisent/synthetic/generators/core/atoms.py +73 -0
  208. wisent/synthetic/generators/diversities/__init__.py +0 -0
  209. wisent/synthetic/generators/diversities/core/__init__.py +0 -0
  210. wisent/synthetic/generators/diversities/core/core.py +68 -0
  211. wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
  212. wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
  213. wisent/synthetic/generators/pairs_generator.py +179 -0
  214. wisent-0.5.2.dist-info/METADATA +67 -0
  215. wisent-0.5.2.dist-info/RECORD +218 -0
  216. {wisent-0.1.1.dist-info → wisent-0.5.2.dist-info}/WHEEL +1 -1
  217. {wisent-0.1.1.dist-info → wisent-0.5.2.dist-info/licenses}/LICENSE +2 -2
  218. wisent/activations/__init__.py +0 -9
  219. wisent/activations/client.py +0 -97
  220. wisent/activations/extractor.py +0 -251
  221. wisent/activations/models.py +0 -95
  222. wisent/client.py +0 -45
  223. wisent/control_vector/__init__.py +0 -9
  224. wisent/control_vector/client.py +0 -85
  225. wisent/control_vector/manager.py +0 -168
  226. wisent/control_vector/models.py +0 -70
  227. wisent/inference/__init__.py +0 -9
  228. wisent/inference/client.py +0 -103
  229. wisent/inference/inferencer.py +0 -250
  230. wisent/inference/models.py +0 -66
  231. wisent/utils/__init__.py +0 -3
  232. wisent/utils/auth.py +0 -30
  233. wisent/utils/http.py +0 -228
  234. wisent/version.py +0 -3
  235. wisent-0.1.1.dist-info/METADATA +0 -142
  236. wisent-0.1.1.dist-info/RECORD +0 -23
  237. {wisent-0.1.1.dist-info → wisent-0.5.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,212 @@
1
+ """
2
+ Steering module for autonomous agent response improvement.
3
+
4
+ This module handles:
5
+ - Response improvement strategies
6
+ - Steering vector generation
7
+ - Regeneration with improved prompts
8
+ - Training data creation for corrections
9
+ """
10
+
11
+ from dataclasses import dataclass
12
+ from typing import List, Dict, Any, Callable, Awaitable
13
+ from .diagnose import AnalysisResult
14
+
15
+
16
+ @dataclass
17
+ class ImprovementResult:
18
+ """Result of self-improvement attempt."""
19
+ original_response: str
20
+ improved_response: str
21
+ improvement_method: str
22
+ success: bool
23
+ improvement_score: float
24
+
25
+
26
+ class ResponseSteering:
27
+ """Handles response improvement and steering for autonomous agents."""
28
+
29
+ def __init__(self, generate_response_func: Callable[[str], Awaitable[str]],
30
+ analyze_response_func: Callable[[str, str], Awaitable[AnalysisResult]]):
31
+ """
32
+ Initialize the steering system.
33
+
34
+ Args:
35
+ generate_response_func: Async function to generate new responses
36
+ analyze_response_func: Async function to analyze responses
37
+ """
38
+ self.generate_response = generate_response_func
39
+ self.analyze_response = analyze_response_func
40
+
41
+ async def improve_response(self, prompt: str, response: str, analysis: AnalysisResult) -> ImprovementResult:
42
+ """Attempt to improve the response."""
43
+ # Decide improvement method based on issues
44
+ method = self.choose_improvement_method(analysis.issues_found)
45
+
46
+ if method == "regenerate":
47
+ return await self.improve_by_regeneration(prompt, response, analysis)
48
+ elif method == "steering":
49
+ return await self.improve_by_steering(prompt, response, analysis)
50
+ else:
51
+ raise ValueError(f"Unknown improvement method: {method}")
52
+
53
+ def choose_improvement_method(self, issues: List[str]) -> str:
54
+ """Choose the best improvement method for the issues."""
55
+ if any(issue in ["scientific_myth", "factual_error_population"] for issue in issues):
56
+ return "steering" # Use steering for factual issues
57
+ elif "excessive_repetition" in issues:
58
+ return "regenerate" # Regenerate for repetition
59
+ else:
60
+ raise ValueError(f"No improvement method available for issues: {issues}")
61
+
62
+ async def improve_by_regeneration(self, prompt: str, response: str, analysis: AnalysisResult) -> ImprovementResult:
63
+ """Improve by regenerating with modified prompt."""
64
+ # Create improved prompt
65
+ improved_prompt = f"{prompt}\n\nPlease ensure your response is factually accurate and avoids repetition."
66
+
67
+ # Generate new response
68
+ new_response = await self.generate_response(improved_prompt)
69
+
70
+ # Assess improvement
71
+ new_analysis = await self.analyze_response(new_response, prompt)
72
+ improvement_score = max(0, new_analysis.quality_score - analysis.quality_score)
73
+
74
+ # Check if issues were resolved
75
+ original_issues = set(analysis.issues_found)
76
+ new_issues = set(new_analysis.issues_found)
77
+ issues_resolved = len(original_issues - new_issues)
78
+ issues_added = len(new_issues - original_issues)
79
+
80
+ # Success if issues were resolved OR quality improved significantly
81
+ issue_resolution_success = issues_resolved > issues_added
82
+ quality_improvement_success = improvement_score > 0.05
83
+ overall_success = issue_resolution_success or quality_improvement_success
84
+
85
+ # Success metrics (can be enabled for debugging)
86
+ if False: # Set to True for detailed debugging
87
+ print(f" 🔧 Regeneration debug:")
88
+ print(f" Original quality: {analysis.quality_score:.3f}")
89
+ print(f" New quality: {new_analysis.quality_score:.3f}")
90
+ print(f" Improvement score: {improvement_score:.3f}")
91
+ print(f" Original issues: {original_issues}")
92
+ print(f" New issues: {new_issues}")
93
+ print(f" Issues resolved: {issues_resolved}")
94
+ print(f" Issues added: {issues_added}")
95
+ print(f" Issue resolution success: {issue_resolution_success}")
96
+ print(f" Quality improvement success: {quality_improvement_success}")
97
+ print(f" Overall success: {overall_success}")
98
+
99
+ return ImprovementResult(
100
+ original_response=response,
101
+ improved_response=new_response,
102
+ improvement_method="regeneration",
103
+ success=overall_success,
104
+ improvement_score=improvement_score
105
+ )
106
+
107
+ async def improve_by_steering(self, prompt: str, response: str, analysis: AnalysisResult) -> ImprovementResult:
108
+ """Improve using steering vectors."""
109
+ # Create training data for steering
110
+ training_data = self.create_steering_training_data(analysis.issues_found)
111
+
112
+ # For now, use a sophisticated prompt-based approach instead of actual steering
113
+ # This mimics the effect of steering by using the training data to create better prompts
114
+ correction_examples = []
115
+ for pair in training_data:
116
+ correction_examples.append(f"Wrong: {pair['harmful']}\nCorrect: {pair['harmless']}")
117
+
118
+ corrections_text = "\n\n".join(correction_examples)
119
+
120
+ # Create improved prompt with correction examples
121
+ improved_prompt = f"""Based on these correction examples:
122
+ {corrections_text}
123
+
124
+ Now please respond to this prompt with factual accuracy:
125
+ {prompt}
126
+
127
+ Ensure your response avoids the types of errors shown in the correction examples above."""
128
+
129
+ new_response = await self.generate_response(improved_prompt)
130
+
131
+ # Assess improvement
132
+ new_analysis = await self.analyze_response(new_response, prompt)
133
+ improvement_score = max(0, new_analysis.quality_score - analysis.quality_score)
134
+
135
+ # Check if issues were resolved
136
+ original_issues = set(analysis.issues_found)
137
+ new_issues = set(new_analysis.issues_found)
138
+ issues_resolved = len(original_issues - new_issues)
139
+ issues_added = len(new_issues - original_issues)
140
+
141
+ # Success if issues were resolved OR quality improved significantly
142
+ issue_resolution_success = issues_resolved > issues_added
143
+ quality_improvement_success = improvement_score > 0.05
144
+ overall_success = issue_resolution_success or quality_improvement_success
145
+
146
+ # Success metrics (can be enabled for debugging)
147
+ if False: # Set to True for detailed debugging
148
+ print(f" 🔧 Steering debug:")
149
+ print(f" Original quality: {analysis.quality_score:.3f}")
150
+ print(f" New quality: {new_analysis.quality_score:.3f}")
151
+ print(f" Improvement score: {improvement_score:.3f}")
152
+ print(f" Original issues: {original_issues}")
153
+ print(f" New issues: {new_issues}")
154
+ print(f" Issues resolved: {issues_resolved}")
155
+ print(f" Issues added: {issues_added}")
156
+ print(f" Issue resolution success: {issue_resolution_success}")
157
+ print(f" Quality improvement success: {quality_improvement_success}")
158
+ print(f" Overall success: {overall_success}")
159
+
160
+ return ImprovementResult(
161
+ original_response=response,
162
+ improved_response=new_response,
163
+ improvement_method="steering",
164
+ success=overall_success,
165
+ improvement_score=improvement_score
166
+ )
167
+
168
+ def create_steering_training_data(self, issues: List[str]) -> List[Dict[str, str]]:
169
+ """Create dynamic steering training data based on detected issues."""
170
+ from ..contrastive_pairs.generate_synthetically import SyntheticContrastivePairGenerator
171
+
172
+ # Generate synthetic training data for the detected issues
173
+ try:
174
+ generator = SyntheticContrastivePairGenerator(self.model)
175
+
176
+ training_pairs = []
177
+ for issue in issues:
178
+ # Create trait description for the issue
179
+ trait_descriptions = {
180
+ "scientific_myth": "providing scientifically accurate information",
181
+ "factual_error_population": "giving correct population statistics",
182
+ "quality": "providing clear, accurate responses",
183
+ "harmful": "giving safe, helpful advice",
184
+ "bias": "responding with fairness and avoiding stereotypes",
185
+ "coherence": "maintaining logical communication"
186
+ }
187
+
188
+ trait_description = trait_descriptions.get(
189
+ issue,
190
+ f"avoiding {issue} issues in responses"
191
+ )
192
+
193
+ # Generate pairs for each issue type
194
+ synthetic_pairs = generator.generate_contrastive_pair_set(
195
+ trait_description=trait_description,
196
+ num_pairs=5, # Generate 5 pairs per issue
197
+ name=f"steering_{issue}"
198
+ )
199
+
200
+ for pair in synthetic_pairs.pairs:
201
+ training_pairs.append({
202
+ "harmful": pair.negative_response,
203
+ "harmless": pair.positive_response
204
+ })
205
+
206
+ if not training_pairs:
207
+ raise ValueError(f"Could not generate training data for issues: {issues}")
208
+
209
+ return training_pairs
210
+
211
+ except Exception as e:
212
+ raise ValueError(f"Failed to generate training data for issues {issues}: {e}")
@@ -0,0 +1,134 @@
1
+ """
2
+ Timeout management for wisent-guard agent operations.
3
+
4
+ This module provides hard timeout enforcement to ensure operations
5
+ don't exceed their allocated time budgets.
6
+ """
7
+
8
+ import asyncio
9
+ import time
10
+ from typing import Optional, Any
11
+ from contextlib import asynccontextmanager
12
+
13
+
14
+ class TimeoutError(Exception):
15
+ """Raised when an operation exceeds its time budget."""
16
+
17
+ def __init__(self, message: str, elapsed_time: float, budget_time: float):
18
+ super().__init__(message)
19
+ self.elapsed_time = elapsed_time
20
+ self.budget_time = budget_time
21
+
22
+
23
+ class TimeoutManager:
24
+ """Manages hard timeouts for agent operations."""
25
+
26
+ def __init__(self, budget_minutes: float):
27
+ self.budget_seconds = budget_minutes * 60.0
28
+ self.start_time = None
29
+ self.deadline = None
30
+
31
+ def start(self):
32
+ """Start the timeout timer."""
33
+ self.start_time = time.time()
34
+ self.deadline = self.start_time + self.budget_seconds
35
+
36
+ def check_timeout(self):
37
+ """Check if we've exceeded the timeout. Raises TimeoutError if so."""
38
+ if self.start_time is None:
39
+ return # Not started yet
40
+
41
+ current_time = time.time()
42
+ elapsed = current_time - self.start_time
43
+
44
+ if current_time > self.deadline:
45
+ raise TimeoutError(
46
+ f"Operation exceeded time budget of {self.budget_seconds:.1f}s (elapsed: {elapsed:.1f}s)",
47
+ elapsed_time=elapsed,
48
+ budget_time=self.budget_seconds
49
+ )
50
+
51
+ def get_remaining_time(self) -> float:
52
+ """Get remaining time in seconds. Returns 0 if expired."""
53
+ if self.start_time is None:
54
+ return self.budget_seconds
55
+
56
+ current_time = time.time()
57
+ remaining = self.deadline - current_time
58
+ return max(0.0, remaining)
59
+
60
+ def get_elapsed_time(self) -> float:
61
+ """Get elapsed time in seconds."""
62
+ if self.start_time is None:
63
+ return 0.0
64
+ return time.time() - self.start_time
65
+
66
+ def is_expired(self) -> bool:
67
+ """Check if the timeout has expired."""
68
+ return self.get_remaining_time() <= 0
69
+
70
+
71
+ @asynccontextmanager
72
+ async def timeout_context(budget_minutes: float):
73
+ """
74
+ Context manager that enforces a hard timeout for async operations.
75
+
76
+ Usage:
77
+ async with timeout_context(5.0) as timeout_mgr:
78
+ # Your operation here
79
+ timeout_mgr.check_timeout() # Call periodically
80
+ """
81
+ timeout_mgr = TimeoutManager(budget_minutes)
82
+ timeout_mgr.start()
83
+
84
+ try:
85
+ yield timeout_mgr
86
+ except TimeoutError:
87
+ print(f"⏰ Operation timed out after {timeout_mgr.get_elapsed_time():.1f}s (budget: {budget_minutes:.1f}min)")
88
+ raise
89
+
90
+
91
+ def with_timeout(budget_minutes: float):
92
+ """
93
+ Decorator that adds timeout enforcement to async functions.
94
+
95
+ Usage:
96
+ @with_timeout(5.0)
97
+ async def my_operation():
98
+ # Your code here
99
+ """
100
+ def decorator(func):
101
+ async def wrapper(*args, **kwargs):
102
+ async with timeout_context(budget_minutes) as timeout_mgr:
103
+ # Inject timeout manager into function if it accepts it
104
+ import inspect
105
+ sig = inspect.signature(func)
106
+ if 'timeout_mgr' in sig.parameters:
107
+ kwargs['timeout_mgr'] = timeout_mgr
108
+
109
+ return await func(*args, **kwargs)
110
+ return wrapper
111
+ return decorator
112
+
113
+
114
+ class AsyncTimeoutChecker:
115
+ """
116
+ Helper class for checking timeouts in long-running async operations.
117
+ Automatically checks timeout every few operations.
118
+ """
119
+
120
+ def __init__(self, timeout_mgr: TimeoutManager, check_interval: int = 10):
121
+ self.timeout_mgr = timeout_mgr
122
+ self.check_interval = check_interval
123
+ self.operation_count = 0
124
+
125
+ def tick(self):
126
+ """Call this on each iteration/operation. Checks timeout periodically."""
127
+ self.operation_count += 1
128
+ if self.operation_count % self.check_interval == 0:
129
+ self.timeout_mgr.check_timeout()
130
+
131
+ async def async_tick(self):
132
+ """Async version that yields control and checks timeout."""
133
+ self.tick()
134
+ await asyncio.sleep(0) # Yield control