wisent 0.1.1__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wisent might be problematic. Click here for more details.
- wisent/__init__.py +1 -8
- wisent/benchmarks/__init__.py +0 -0
- wisent/benchmarks/coding/__init__.py +0 -0
- wisent/benchmarks/coding/metrics/__init__.py +0 -0
- wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
- wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
- wisent/benchmarks/coding/metrics/evaluator.py +275 -0
- wisent/benchmarks/coding/metrics/passk.py +66 -0
- wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
- wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
- wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
- wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
- wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
- wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
- wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
- wisent/benchmarks/coding/providers/__init__.py +18 -0
- wisent/benchmarks/coding/providers/core/__init__.py +0 -0
- wisent/benchmarks/coding/providers/core/atoms.py +31 -0
- wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
- wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
- wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
- wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
- wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
- wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
- wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
- wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
- wisent/classifiers/__init__.py +0 -0
- wisent/classifiers/core/__init__.py +0 -0
- wisent/classifiers/core/atoms.py +747 -0
- wisent/classifiers/models/__init__.py +0 -0
- wisent/classifiers/models/logistic.py +29 -0
- wisent/classifiers/models/mlp.py +47 -0
- wisent/cli/__init__.py +0 -0
- wisent/cli/classifiers/__init__.py +0 -0
- wisent/cli/classifiers/classifier_rotator.py +137 -0
- wisent/cli/cli_logger.py +142 -0
- wisent/cli/data_loaders/__init__.py +0 -0
- wisent/cli/data_loaders/data_loader_rotator.py +96 -0
- wisent/cli/evaluators/__init__.py +0 -0
- wisent/cli/evaluators/evaluator_rotator.py +148 -0
- wisent/cli/steering_methods/__init__.py +0 -0
- wisent/cli/steering_methods/steering_rotator.py +110 -0
- wisent/cli/wisent_cli/__init__.py +0 -0
- wisent/cli/wisent_cli/commands/__init__.py +0 -0
- wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
- wisent/cli/wisent_cli/commands/listing.py +154 -0
- wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
- wisent/cli/wisent_cli/main.py +93 -0
- wisent/cli/wisent_cli/shell.py +80 -0
- wisent/cli/wisent_cli/ui.py +69 -0
- wisent/cli/wisent_cli/util/__init__.py +0 -0
- wisent/cli/wisent_cli/util/aggregations.py +43 -0
- wisent/cli/wisent_cli/util/parsing.py +126 -0
- wisent/cli/wisent_cli/version.py +4 -0
- wisent/core/__init__.py +27 -0
- wisent/core/activations/__init__.py +0 -0
- wisent/core/activations/activations_collector.py +338 -0
- wisent/core/activations/core/__init__.py +0 -0
- wisent/core/activations/core/atoms.py +216 -0
- wisent/core/agent/__init__.py +18 -0
- wisent/core/agent/budget.py +638 -0
- wisent/core/agent/device_benchmarks.py +685 -0
- wisent/core/agent/diagnose/__init__.py +55 -0
- wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
- wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
- wisent/core/agent/diagnose/create_classifier.py +1154 -0
- wisent/core/agent/diagnose/response_diagnostics.py +268 -0
- wisent/core/agent/diagnose/select_classifiers.py +506 -0
- wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
- wisent/core/agent/diagnose/tasks/__init__.py +33 -0
- wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
- wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
- wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
- wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
- wisent/core/agent/diagnose.py +242 -0
- wisent/core/agent/steer.py +212 -0
- wisent/core/agent/timeout.py +134 -0
- wisent/core/autonomous_agent.py +1234 -0
- wisent/core/bigcode_integration.py +583 -0
- wisent/core/contrastive_pairs/__init__.py +15 -0
- wisent/core/contrastive_pairs/core/__init__.py +0 -0
- wisent/core/contrastive_pairs/core/atoms.py +45 -0
- wisent/core/contrastive_pairs/core/buliders.py +59 -0
- wisent/core/contrastive_pairs/core/pair.py +178 -0
- wisent/core/contrastive_pairs/core/response.py +152 -0
- wisent/core/contrastive_pairs/core/serialization.py +300 -0
- wisent/core/contrastive_pairs/core/set.py +133 -0
- wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
- wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
- wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
- wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
- wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
- wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
- wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
- wisent/core/data_loaders/__init__.py +0 -0
- wisent/core/data_loaders/core/__init__.py +0 -0
- wisent/core/data_loaders/core/atoms.py +98 -0
- wisent/core/data_loaders/loaders/__init__.py +0 -0
- wisent/core/data_loaders/loaders/custom.py +120 -0
- wisent/core/data_loaders/loaders/lm_loader.py +218 -0
- wisent/core/detection_handling.py +257 -0
- wisent/core/download_full_benchmarks.py +1386 -0
- wisent/core/evaluators/__init__.py +0 -0
- wisent/core/evaluators/oracles/__init__.py +0 -0
- wisent/core/evaluators/oracles/interactive.py +73 -0
- wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
- wisent/core/evaluators/oracles/user_specified.py +67 -0
- wisent/core/hyperparameter_optimizer.py +429 -0
- wisent/core/lm_eval_harness_ground_truth.py +1396 -0
- wisent/core/log_likelihoods_evaluator.py +321 -0
- wisent/core/managed_cached_benchmarks.py +595 -0
- wisent/core/mixed_benchmark_sampler.py +364 -0
- wisent/core/model_config_manager.py +330 -0
- wisent/core/model_persistence.py +317 -0
- wisent/core/models/__init__.py +0 -0
- wisent/core/models/core/__init__.py +0 -0
- wisent/core/models/core/atoms.py +460 -0
- wisent/core/models/wisent_model.py +727 -0
- wisent/core/multi_steering.py +316 -0
- wisent/core/optuna/__init__.py +57 -0
- wisent/core/optuna/classifier/__init__.py +25 -0
- wisent/core/optuna/classifier/activation_generator.py +349 -0
- wisent/core/optuna/classifier/classifier_cache.py +509 -0
- wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
- wisent/core/optuna/steering/__init__.py +0 -0
- wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
- wisent/core/optuna/steering/data_utils.py +342 -0
- wisent/core/optuna/steering/metrics.py +474 -0
- wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
- wisent/core/optuna/steering/steering_optimization.py +1111 -0
- wisent/core/parser.py +1668 -0
- wisent/core/prompts/__init__.py +0 -0
- wisent/core/prompts/core/__init__.py +0 -0
- wisent/core/prompts/core/atom.py +57 -0
- wisent/core/prompts/core/prompt_formater.py +157 -0
- wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
- wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
- wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
- wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
- wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
- wisent/core/representation.py +5 -0
- wisent/core/sample_size_optimizer.py +648 -0
- wisent/core/sample_size_optimizer_v2.py +355 -0
- wisent/core/save_results.py +277 -0
- wisent/core/steering.py +652 -0
- wisent/core/steering_method.py +26 -0
- wisent/core/steering_methods/__init__.py +0 -0
- wisent/core/steering_methods/core/__init__.py +0 -0
- wisent/core/steering_methods/core/atoms.py +153 -0
- wisent/core/steering_methods/methods/__init__.py +0 -0
- wisent/core/steering_methods/methods/caa.py +44 -0
- wisent/core/steering_optimizer.py +1297 -0
- wisent/core/task_interface.py +132 -0
- wisent/core/task_selector.py +189 -0
- wisent/core/tasks/__init__.py +175 -0
- wisent/core/tasks/aime_task.py +141 -0
- wisent/core/tasks/file_task.py +211 -0
- wisent/core/tasks/hle_task.py +180 -0
- wisent/core/tasks/hmmt_task.py +119 -0
- wisent/core/tasks/livecodebench_task.py +201 -0
- wisent/core/tasks/livemathbench_task.py +158 -0
- wisent/core/tasks/lm_eval_task.py +455 -0
- wisent/core/tasks/math500_task.py +84 -0
- wisent/core/tasks/polymath_task.py +146 -0
- wisent/core/tasks/supergpqa_task.py +220 -0
- wisent/core/time_estimator.py +149 -0
- wisent/core/timing_calibration.py +174 -0
- wisent/core/tracking/__init__.py +54 -0
- wisent/core/tracking/latency.py +618 -0
- wisent/core/tracking/memory.py +359 -0
- wisent/core/trainers/__init__.py +0 -0
- wisent/core/trainers/core/__init__.py +11 -0
- wisent/core/trainers/core/atoms.py +45 -0
- wisent/core/trainers/steering_trainer.py +271 -0
- wisent/core/user_model_config.py +158 -0
- wisent/opti/__init__.py +0 -0
- wisent/opti/core/__init__.py +0 -0
- wisent/opti/core/atoms.py +175 -0
- wisent/opti/methods/__init__.py +0 -0
- wisent/opti/methods/opti_classificator.py +172 -0
- wisent/opti/methods/opti_steering.py +138 -0
- wisent/synthetic/__init__.py +0 -0
- wisent/synthetic/cleaners/__init__.py +0 -0
- wisent/synthetic/cleaners/core/__init__.py +0 -0
- wisent/synthetic/cleaners/core/atoms.py +58 -0
- wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
- wisent/synthetic/cleaners/methods/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
- wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
- wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
- wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
- wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
- wisent/synthetic/db_instructions/__init__.py +0 -0
- wisent/synthetic/db_instructions/core/__init__.py +0 -0
- wisent/synthetic/db_instructions/core/atoms.py +25 -0
- wisent/synthetic/db_instructions/mini_dp.py +37 -0
- wisent/synthetic/generators/__init__.py +0 -0
- wisent/synthetic/generators/core/__init__.py +0 -0
- wisent/synthetic/generators/core/atoms.py +73 -0
- wisent/synthetic/generators/diversities/__init__.py +0 -0
- wisent/synthetic/generators/diversities/core/__init__.py +0 -0
- wisent/synthetic/generators/diversities/core/core.py +68 -0
- wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
- wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
- wisent/synthetic/generators/pairs_generator.py +179 -0
- wisent-0.5.2.dist-info/METADATA +67 -0
- wisent-0.5.2.dist-info/RECORD +218 -0
- {wisent-0.1.1.dist-info → wisent-0.5.2.dist-info}/WHEEL +1 -1
- {wisent-0.1.1.dist-info → wisent-0.5.2.dist-info/licenses}/LICENSE +2 -2
- wisent/activations/__init__.py +0 -9
- wisent/activations/client.py +0 -97
- wisent/activations/extractor.py +0 -251
- wisent/activations/models.py +0 -95
- wisent/client.py +0 -45
- wisent/control_vector/__init__.py +0 -9
- wisent/control_vector/client.py +0 -85
- wisent/control_vector/manager.py +0 -168
- wisent/control_vector/models.py +0 -70
- wisent/inference/__init__.py +0 -9
- wisent/inference/client.py +0 -103
- wisent/inference/inferencer.py +0 -250
- wisent/inference/models.py +0 -66
- wisent/utils/__init__.py +0 -3
- wisent/utils/auth.py +0 -30
- wisent/utils/http.py +0 -228
- wisent/version.py +0 -3
- wisent-0.1.1.dist-info/METADATA +0 -142
- wisent-0.1.1.dist-info/RECORD +0 -23
- {wisent-0.1.1.dist-info → wisent-0.5.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Steering module for autonomous agent response improvement.
|
|
3
|
+
|
|
4
|
+
This module handles:
|
|
5
|
+
- Response improvement strategies
|
|
6
|
+
- Steering vector generation
|
|
7
|
+
- Regeneration with improved prompts
|
|
8
|
+
- Training data creation for corrections
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import List, Dict, Any, Callable, Awaitable
|
|
13
|
+
from .diagnose import AnalysisResult
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ImprovementResult:
|
|
18
|
+
"""Result of self-improvement attempt."""
|
|
19
|
+
original_response: str
|
|
20
|
+
improved_response: str
|
|
21
|
+
improvement_method: str
|
|
22
|
+
success: bool
|
|
23
|
+
improvement_score: float
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ResponseSteering:
|
|
27
|
+
"""Handles response improvement and steering for autonomous agents."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, generate_response_func: Callable[[str], Awaitable[str]],
|
|
30
|
+
analyze_response_func: Callable[[str, str], Awaitable[AnalysisResult]]):
|
|
31
|
+
"""
|
|
32
|
+
Initialize the steering system.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
generate_response_func: Async function to generate new responses
|
|
36
|
+
analyze_response_func: Async function to analyze responses
|
|
37
|
+
"""
|
|
38
|
+
self.generate_response = generate_response_func
|
|
39
|
+
self.analyze_response = analyze_response_func
|
|
40
|
+
|
|
41
|
+
async def improve_response(self, prompt: str, response: str, analysis: AnalysisResult) -> ImprovementResult:
|
|
42
|
+
"""Attempt to improve the response."""
|
|
43
|
+
# Decide improvement method based on issues
|
|
44
|
+
method = self.choose_improvement_method(analysis.issues_found)
|
|
45
|
+
|
|
46
|
+
if method == "regenerate":
|
|
47
|
+
return await self.improve_by_regeneration(prompt, response, analysis)
|
|
48
|
+
elif method == "steering":
|
|
49
|
+
return await self.improve_by_steering(prompt, response, analysis)
|
|
50
|
+
else:
|
|
51
|
+
raise ValueError(f"Unknown improvement method: {method}")
|
|
52
|
+
|
|
53
|
+
def choose_improvement_method(self, issues: List[str]) -> str:
|
|
54
|
+
"""Choose the best improvement method for the issues."""
|
|
55
|
+
if any(issue in ["scientific_myth", "factual_error_population"] for issue in issues):
|
|
56
|
+
return "steering" # Use steering for factual issues
|
|
57
|
+
elif "excessive_repetition" in issues:
|
|
58
|
+
return "regenerate" # Regenerate for repetition
|
|
59
|
+
else:
|
|
60
|
+
raise ValueError(f"No improvement method available for issues: {issues}")
|
|
61
|
+
|
|
62
|
+
async def improve_by_regeneration(self, prompt: str, response: str, analysis: AnalysisResult) -> ImprovementResult:
|
|
63
|
+
"""Improve by regenerating with modified prompt."""
|
|
64
|
+
# Create improved prompt
|
|
65
|
+
improved_prompt = f"{prompt}\n\nPlease ensure your response is factually accurate and avoids repetition."
|
|
66
|
+
|
|
67
|
+
# Generate new response
|
|
68
|
+
new_response = await self.generate_response(improved_prompt)
|
|
69
|
+
|
|
70
|
+
# Assess improvement
|
|
71
|
+
new_analysis = await self.analyze_response(new_response, prompt)
|
|
72
|
+
improvement_score = max(0, new_analysis.quality_score - analysis.quality_score)
|
|
73
|
+
|
|
74
|
+
# Check if issues were resolved
|
|
75
|
+
original_issues = set(analysis.issues_found)
|
|
76
|
+
new_issues = set(new_analysis.issues_found)
|
|
77
|
+
issues_resolved = len(original_issues - new_issues)
|
|
78
|
+
issues_added = len(new_issues - original_issues)
|
|
79
|
+
|
|
80
|
+
# Success if issues were resolved OR quality improved significantly
|
|
81
|
+
issue_resolution_success = issues_resolved > issues_added
|
|
82
|
+
quality_improvement_success = improvement_score > 0.05
|
|
83
|
+
overall_success = issue_resolution_success or quality_improvement_success
|
|
84
|
+
|
|
85
|
+
# Success metrics (can be enabled for debugging)
|
|
86
|
+
if False: # Set to True for detailed debugging
|
|
87
|
+
print(f" 🔧 Regeneration debug:")
|
|
88
|
+
print(f" Original quality: {analysis.quality_score:.3f}")
|
|
89
|
+
print(f" New quality: {new_analysis.quality_score:.3f}")
|
|
90
|
+
print(f" Improvement score: {improvement_score:.3f}")
|
|
91
|
+
print(f" Original issues: {original_issues}")
|
|
92
|
+
print(f" New issues: {new_issues}")
|
|
93
|
+
print(f" Issues resolved: {issues_resolved}")
|
|
94
|
+
print(f" Issues added: {issues_added}")
|
|
95
|
+
print(f" Issue resolution success: {issue_resolution_success}")
|
|
96
|
+
print(f" Quality improvement success: {quality_improvement_success}")
|
|
97
|
+
print(f" Overall success: {overall_success}")
|
|
98
|
+
|
|
99
|
+
return ImprovementResult(
|
|
100
|
+
original_response=response,
|
|
101
|
+
improved_response=new_response,
|
|
102
|
+
improvement_method="regeneration",
|
|
103
|
+
success=overall_success,
|
|
104
|
+
improvement_score=improvement_score
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
async def improve_by_steering(self, prompt: str, response: str, analysis: AnalysisResult) -> ImprovementResult:
|
|
108
|
+
"""Improve using steering vectors."""
|
|
109
|
+
# Create training data for steering
|
|
110
|
+
training_data = self.create_steering_training_data(analysis.issues_found)
|
|
111
|
+
|
|
112
|
+
# For now, use a sophisticated prompt-based approach instead of actual steering
|
|
113
|
+
# This mimics the effect of steering by using the training data to create better prompts
|
|
114
|
+
correction_examples = []
|
|
115
|
+
for pair in training_data:
|
|
116
|
+
correction_examples.append(f"Wrong: {pair['harmful']}\nCorrect: {pair['harmless']}")
|
|
117
|
+
|
|
118
|
+
corrections_text = "\n\n".join(correction_examples)
|
|
119
|
+
|
|
120
|
+
# Create improved prompt with correction examples
|
|
121
|
+
improved_prompt = f"""Based on these correction examples:
|
|
122
|
+
{corrections_text}
|
|
123
|
+
|
|
124
|
+
Now please respond to this prompt with factual accuracy:
|
|
125
|
+
{prompt}
|
|
126
|
+
|
|
127
|
+
Ensure your response avoids the types of errors shown in the correction examples above."""
|
|
128
|
+
|
|
129
|
+
new_response = await self.generate_response(improved_prompt)
|
|
130
|
+
|
|
131
|
+
# Assess improvement
|
|
132
|
+
new_analysis = await self.analyze_response(new_response, prompt)
|
|
133
|
+
improvement_score = max(0, new_analysis.quality_score - analysis.quality_score)
|
|
134
|
+
|
|
135
|
+
# Check if issues were resolved
|
|
136
|
+
original_issues = set(analysis.issues_found)
|
|
137
|
+
new_issues = set(new_analysis.issues_found)
|
|
138
|
+
issues_resolved = len(original_issues - new_issues)
|
|
139
|
+
issues_added = len(new_issues - original_issues)
|
|
140
|
+
|
|
141
|
+
# Success if issues were resolved OR quality improved significantly
|
|
142
|
+
issue_resolution_success = issues_resolved > issues_added
|
|
143
|
+
quality_improvement_success = improvement_score > 0.05
|
|
144
|
+
overall_success = issue_resolution_success or quality_improvement_success
|
|
145
|
+
|
|
146
|
+
# Success metrics (can be enabled for debugging)
|
|
147
|
+
if False: # Set to True for detailed debugging
|
|
148
|
+
print(f" 🔧 Steering debug:")
|
|
149
|
+
print(f" Original quality: {analysis.quality_score:.3f}")
|
|
150
|
+
print(f" New quality: {new_analysis.quality_score:.3f}")
|
|
151
|
+
print(f" Improvement score: {improvement_score:.3f}")
|
|
152
|
+
print(f" Original issues: {original_issues}")
|
|
153
|
+
print(f" New issues: {new_issues}")
|
|
154
|
+
print(f" Issues resolved: {issues_resolved}")
|
|
155
|
+
print(f" Issues added: {issues_added}")
|
|
156
|
+
print(f" Issue resolution success: {issue_resolution_success}")
|
|
157
|
+
print(f" Quality improvement success: {quality_improvement_success}")
|
|
158
|
+
print(f" Overall success: {overall_success}")
|
|
159
|
+
|
|
160
|
+
return ImprovementResult(
|
|
161
|
+
original_response=response,
|
|
162
|
+
improved_response=new_response,
|
|
163
|
+
improvement_method="steering",
|
|
164
|
+
success=overall_success,
|
|
165
|
+
improvement_score=improvement_score
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def create_steering_training_data(self, issues: List[str]) -> List[Dict[str, str]]:
|
|
169
|
+
"""Create dynamic steering training data based on detected issues."""
|
|
170
|
+
from ..contrastive_pairs.generate_synthetically import SyntheticContrastivePairGenerator
|
|
171
|
+
|
|
172
|
+
# Generate synthetic training data for the detected issues
|
|
173
|
+
try:
|
|
174
|
+
generator = SyntheticContrastivePairGenerator(self.model)
|
|
175
|
+
|
|
176
|
+
training_pairs = []
|
|
177
|
+
for issue in issues:
|
|
178
|
+
# Create trait description for the issue
|
|
179
|
+
trait_descriptions = {
|
|
180
|
+
"scientific_myth": "providing scientifically accurate information",
|
|
181
|
+
"factual_error_population": "giving correct population statistics",
|
|
182
|
+
"quality": "providing clear, accurate responses",
|
|
183
|
+
"harmful": "giving safe, helpful advice",
|
|
184
|
+
"bias": "responding with fairness and avoiding stereotypes",
|
|
185
|
+
"coherence": "maintaining logical communication"
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
trait_description = trait_descriptions.get(
|
|
189
|
+
issue,
|
|
190
|
+
f"avoiding {issue} issues in responses"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Generate pairs for each issue type
|
|
194
|
+
synthetic_pairs = generator.generate_contrastive_pair_set(
|
|
195
|
+
trait_description=trait_description,
|
|
196
|
+
num_pairs=5, # Generate 5 pairs per issue
|
|
197
|
+
name=f"steering_{issue}"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
for pair in synthetic_pairs.pairs:
|
|
201
|
+
training_pairs.append({
|
|
202
|
+
"harmful": pair.negative_response,
|
|
203
|
+
"harmless": pair.positive_response
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
if not training_pairs:
|
|
207
|
+
raise ValueError(f"Could not generate training data for issues: {issues}")
|
|
208
|
+
|
|
209
|
+
return training_pairs
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
raise ValueError(f"Failed to generate training data for issues {issues}: {e}")
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Timeout management for wisent-guard agent operations.
|
|
3
|
+
|
|
4
|
+
This module provides hard timeout enforcement to ensure operations
|
|
5
|
+
don't exceed their allocated time budgets.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import time
|
|
10
|
+
from typing import Optional, Any
|
|
11
|
+
from contextlib import asynccontextmanager
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TimeoutError(Exception):
|
|
15
|
+
"""Raised when an operation exceeds its time budget."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, message: str, elapsed_time: float, budget_time: float):
|
|
18
|
+
super().__init__(message)
|
|
19
|
+
self.elapsed_time = elapsed_time
|
|
20
|
+
self.budget_time = budget_time
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TimeoutManager:
|
|
24
|
+
"""Manages hard timeouts for agent operations."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, budget_minutes: float):
|
|
27
|
+
self.budget_seconds = budget_minutes * 60.0
|
|
28
|
+
self.start_time = None
|
|
29
|
+
self.deadline = None
|
|
30
|
+
|
|
31
|
+
def start(self):
|
|
32
|
+
"""Start the timeout timer."""
|
|
33
|
+
self.start_time = time.time()
|
|
34
|
+
self.deadline = self.start_time + self.budget_seconds
|
|
35
|
+
|
|
36
|
+
def check_timeout(self):
|
|
37
|
+
"""Check if we've exceeded the timeout. Raises TimeoutError if so."""
|
|
38
|
+
if self.start_time is None:
|
|
39
|
+
return # Not started yet
|
|
40
|
+
|
|
41
|
+
current_time = time.time()
|
|
42
|
+
elapsed = current_time - self.start_time
|
|
43
|
+
|
|
44
|
+
if current_time > self.deadline:
|
|
45
|
+
raise TimeoutError(
|
|
46
|
+
f"Operation exceeded time budget of {self.budget_seconds:.1f}s (elapsed: {elapsed:.1f}s)",
|
|
47
|
+
elapsed_time=elapsed,
|
|
48
|
+
budget_time=self.budget_seconds
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def get_remaining_time(self) -> float:
|
|
52
|
+
"""Get remaining time in seconds. Returns 0 if expired."""
|
|
53
|
+
if self.start_time is None:
|
|
54
|
+
return self.budget_seconds
|
|
55
|
+
|
|
56
|
+
current_time = time.time()
|
|
57
|
+
remaining = self.deadline - current_time
|
|
58
|
+
return max(0.0, remaining)
|
|
59
|
+
|
|
60
|
+
def get_elapsed_time(self) -> float:
|
|
61
|
+
"""Get elapsed time in seconds."""
|
|
62
|
+
if self.start_time is None:
|
|
63
|
+
return 0.0
|
|
64
|
+
return time.time() - self.start_time
|
|
65
|
+
|
|
66
|
+
def is_expired(self) -> bool:
|
|
67
|
+
"""Check if the timeout has expired."""
|
|
68
|
+
return self.get_remaining_time() <= 0
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@asynccontextmanager
|
|
72
|
+
async def timeout_context(budget_minutes: float):
|
|
73
|
+
"""
|
|
74
|
+
Context manager that enforces a hard timeout for async operations.
|
|
75
|
+
|
|
76
|
+
Usage:
|
|
77
|
+
async with timeout_context(5.0) as timeout_mgr:
|
|
78
|
+
# Your operation here
|
|
79
|
+
timeout_mgr.check_timeout() # Call periodically
|
|
80
|
+
"""
|
|
81
|
+
timeout_mgr = TimeoutManager(budget_minutes)
|
|
82
|
+
timeout_mgr.start()
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
yield timeout_mgr
|
|
86
|
+
except TimeoutError:
|
|
87
|
+
print(f"⏰ Operation timed out after {timeout_mgr.get_elapsed_time():.1f}s (budget: {budget_minutes:.1f}min)")
|
|
88
|
+
raise
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def with_timeout(budget_minutes: float):
|
|
92
|
+
"""
|
|
93
|
+
Decorator that adds timeout enforcement to async functions.
|
|
94
|
+
|
|
95
|
+
Usage:
|
|
96
|
+
@with_timeout(5.0)
|
|
97
|
+
async def my_operation():
|
|
98
|
+
# Your code here
|
|
99
|
+
"""
|
|
100
|
+
def decorator(func):
|
|
101
|
+
async def wrapper(*args, **kwargs):
|
|
102
|
+
async with timeout_context(budget_minutes) as timeout_mgr:
|
|
103
|
+
# Inject timeout manager into function if it accepts it
|
|
104
|
+
import inspect
|
|
105
|
+
sig = inspect.signature(func)
|
|
106
|
+
if 'timeout_mgr' in sig.parameters:
|
|
107
|
+
kwargs['timeout_mgr'] = timeout_mgr
|
|
108
|
+
|
|
109
|
+
return await func(*args, **kwargs)
|
|
110
|
+
return wrapper
|
|
111
|
+
return decorator
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class AsyncTimeoutChecker:
|
|
115
|
+
"""
|
|
116
|
+
Helper class for checking timeouts in long-running async operations.
|
|
117
|
+
Automatically checks timeout every few operations.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(self, timeout_mgr: TimeoutManager, check_interval: int = 10):
|
|
121
|
+
self.timeout_mgr = timeout_mgr
|
|
122
|
+
self.check_interval = check_interval
|
|
123
|
+
self.operation_count = 0
|
|
124
|
+
|
|
125
|
+
def tick(self):
|
|
126
|
+
"""Call this on each iteration/operation. Checks timeout periodically."""
|
|
127
|
+
self.operation_count += 1
|
|
128
|
+
if self.operation_count % self.check_interval == 0:
|
|
129
|
+
self.timeout_mgr.check_timeout()
|
|
130
|
+
|
|
131
|
+
async def async_tick(self):
|
|
132
|
+
"""Async version that yields control and checks timeout."""
|
|
133
|
+
self.tick()
|
|
134
|
+
await asyncio.sleep(0) # Yield control
|