wisent 0.1.1__py3-none-any.whl ā 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wisent might be problematic. Click here for more details.
- wisent/__init__.py +1 -8
- wisent/benchmarks/__init__.py +0 -0
- wisent/benchmarks/coding/__init__.py +0 -0
- wisent/benchmarks/coding/metrics/__init__.py +0 -0
- wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
- wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
- wisent/benchmarks/coding/metrics/evaluator.py +275 -0
- wisent/benchmarks/coding/metrics/passk.py +66 -0
- wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
- wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
- wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
- wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
- wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
- wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
- wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
- wisent/benchmarks/coding/providers/__init__.py +18 -0
- wisent/benchmarks/coding/providers/core/__init__.py +0 -0
- wisent/benchmarks/coding/providers/core/atoms.py +31 -0
- wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
- wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
- wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
- wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
- wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
- wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
- wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
- wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
- wisent/classifiers/__init__.py +0 -0
- wisent/classifiers/core/__init__.py +0 -0
- wisent/classifiers/core/atoms.py +747 -0
- wisent/classifiers/models/__init__.py +0 -0
- wisent/classifiers/models/logistic.py +29 -0
- wisent/classifiers/models/mlp.py +47 -0
- wisent/cli/__init__.py +0 -0
- wisent/cli/classifiers/__init__.py +0 -0
- wisent/cli/classifiers/classifier_rotator.py +137 -0
- wisent/cli/cli_logger.py +142 -0
- wisent/cli/data_loaders/__init__.py +0 -0
- wisent/cli/data_loaders/data_loader_rotator.py +96 -0
- wisent/cli/evaluators/__init__.py +0 -0
- wisent/cli/evaluators/evaluator_rotator.py +148 -0
- wisent/cli/steering_methods/__init__.py +0 -0
- wisent/cli/steering_methods/steering_rotator.py +110 -0
- wisent/cli/wisent_cli/__init__.py +0 -0
- wisent/cli/wisent_cli/commands/__init__.py +0 -0
- wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
- wisent/cli/wisent_cli/commands/listing.py +154 -0
- wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
- wisent/cli/wisent_cli/main.py +93 -0
- wisent/cli/wisent_cli/shell.py +80 -0
- wisent/cli/wisent_cli/ui.py +69 -0
- wisent/cli/wisent_cli/util/__init__.py +0 -0
- wisent/cli/wisent_cli/util/aggregations.py +43 -0
- wisent/cli/wisent_cli/util/parsing.py +126 -0
- wisent/cli/wisent_cli/version.py +4 -0
- wisent/core/__init__.py +27 -0
- wisent/core/activations/__init__.py +0 -0
- wisent/core/activations/activations_collector.py +338 -0
- wisent/core/activations/core/__init__.py +0 -0
- wisent/core/activations/core/atoms.py +216 -0
- wisent/core/agent/__init__.py +18 -0
- wisent/core/agent/budget.py +638 -0
- wisent/core/agent/device_benchmarks.py +685 -0
- wisent/core/agent/diagnose/__init__.py +55 -0
- wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
- wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
- wisent/core/agent/diagnose/create_classifier.py +1154 -0
- wisent/core/agent/diagnose/response_diagnostics.py +268 -0
- wisent/core/agent/diagnose/select_classifiers.py +506 -0
- wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
- wisent/core/agent/diagnose/tasks/__init__.py +33 -0
- wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
- wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
- wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
- wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
- wisent/core/agent/diagnose.py +242 -0
- wisent/core/agent/steer.py +212 -0
- wisent/core/agent/timeout.py +134 -0
- wisent/core/autonomous_agent.py +1234 -0
- wisent/core/bigcode_integration.py +583 -0
- wisent/core/contrastive_pairs/__init__.py +15 -0
- wisent/core/contrastive_pairs/core/__init__.py +0 -0
- wisent/core/contrastive_pairs/core/atoms.py +45 -0
- wisent/core/contrastive_pairs/core/buliders.py +59 -0
- wisent/core/contrastive_pairs/core/pair.py +178 -0
- wisent/core/contrastive_pairs/core/response.py +152 -0
- wisent/core/contrastive_pairs/core/serialization.py +300 -0
- wisent/core/contrastive_pairs/core/set.py +133 -0
- wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
- wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
- wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
- wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
- wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
- wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
- wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
- wisent/core/data_loaders/__init__.py +0 -0
- wisent/core/data_loaders/core/__init__.py +0 -0
- wisent/core/data_loaders/core/atoms.py +98 -0
- wisent/core/data_loaders/loaders/__init__.py +0 -0
- wisent/core/data_loaders/loaders/custom.py +120 -0
- wisent/core/data_loaders/loaders/lm_loader.py +218 -0
- wisent/core/detection_handling.py +257 -0
- wisent/core/download_full_benchmarks.py +1386 -0
- wisent/core/evaluators/__init__.py +0 -0
- wisent/core/evaluators/oracles/__init__.py +0 -0
- wisent/core/evaluators/oracles/interactive.py +73 -0
- wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
- wisent/core/evaluators/oracles/user_specified.py +67 -0
- wisent/core/hyperparameter_optimizer.py +429 -0
- wisent/core/lm_eval_harness_ground_truth.py +1396 -0
- wisent/core/log_likelihoods_evaluator.py +321 -0
- wisent/core/managed_cached_benchmarks.py +595 -0
- wisent/core/mixed_benchmark_sampler.py +364 -0
- wisent/core/model_config_manager.py +330 -0
- wisent/core/model_persistence.py +317 -0
- wisent/core/models/__init__.py +0 -0
- wisent/core/models/core/__init__.py +0 -0
- wisent/core/models/core/atoms.py +460 -0
- wisent/core/models/wisent_model.py +727 -0
- wisent/core/multi_steering.py +316 -0
- wisent/core/optuna/__init__.py +57 -0
- wisent/core/optuna/classifier/__init__.py +25 -0
- wisent/core/optuna/classifier/activation_generator.py +349 -0
- wisent/core/optuna/classifier/classifier_cache.py +509 -0
- wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
- wisent/core/optuna/steering/__init__.py +0 -0
- wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
- wisent/core/optuna/steering/data_utils.py +342 -0
- wisent/core/optuna/steering/metrics.py +474 -0
- wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
- wisent/core/optuna/steering/steering_optimization.py +1111 -0
- wisent/core/parser.py +1668 -0
- wisent/core/prompts/__init__.py +0 -0
- wisent/core/prompts/core/__init__.py +0 -0
- wisent/core/prompts/core/atom.py +57 -0
- wisent/core/prompts/core/prompt_formater.py +157 -0
- wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
- wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
- wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
- wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
- wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
- wisent/core/representation.py +5 -0
- wisent/core/sample_size_optimizer.py +648 -0
- wisent/core/sample_size_optimizer_v2.py +355 -0
- wisent/core/save_results.py +277 -0
- wisent/core/steering.py +652 -0
- wisent/core/steering_method.py +26 -0
- wisent/core/steering_methods/__init__.py +0 -0
- wisent/core/steering_methods/core/__init__.py +0 -0
- wisent/core/steering_methods/core/atoms.py +153 -0
- wisent/core/steering_methods/methods/__init__.py +0 -0
- wisent/core/steering_methods/methods/caa.py +44 -0
- wisent/core/steering_optimizer.py +1297 -0
- wisent/core/task_interface.py +132 -0
- wisent/core/task_selector.py +189 -0
- wisent/core/tasks/__init__.py +175 -0
- wisent/core/tasks/aime_task.py +141 -0
- wisent/core/tasks/file_task.py +211 -0
- wisent/core/tasks/hle_task.py +180 -0
- wisent/core/tasks/hmmt_task.py +119 -0
- wisent/core/tasks/livecodebench_task.py +201 -0
- wisent/core/tasks/livemathbench_task.py +158 -0
- wisent/core/tasks/lm_eval_task.py +455 -0
- wisent/core/tasks/math500_task.py +84 -0
- wisent/core/tasks/polymath_task.py +146 -0
- wisent/core/tasks/supergpqa_task.py +220 -0
- wisent/core/time_estimator.py +149 -0
- wisent/core/timing_calibration.py +174 -0
- wisent/core/tracking/__init__.py +54 -0
- wisent/core/tracking/latency.py +618 -0
- wisent/core/tracking/memory.py +359 -0
- wisent/core/trainers/__init__.py +0 -0
- wisent/core/trainers/core/__init__.py +11 -0
- wisent/core/trainers/core/atoms.py +45 -0
- wisent/core/trainers/steering_trainer.py +271 -0
- wisent/core/user_model_config.py +158 -0
- wisent/opti/__init__.py +0 -0
- wisent/opti/core/__init__.py +0 -0
- wisent/opti/core/atoms.py +175 -0
- wisent/opti/methods/__init__.py +0 -0
- wisent/opti/methods/opti_classificator.py +172 -0
- wisent/opti/methods/opti_steering.py +138 -0
- wisent/synthetic/__init__.py +0 -0
- wisent/synthetic/cleaners/__init__.py +0 -0
- wisent/synthetic/cleaners/core/__init__.py +0 -0
- wisent/synthetic/cleaners/core/atoms.py +58 -0
- wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
- wisent/synthetic/cleaners/methods/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
- wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
- wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
- wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
- wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
- wisent/synthetic/db_instructions/__init__.py +0 -0
- wisent/synthetic/db_instructions/core/__init__.py +0 -0
- wisent/synthetic/db_instructions/core/atoms.py +25 -0
- wisent/synthetic/db_instructions/mini_dp.py +37 -0
- wisent/synthetic/generators/__init__.py +0 -0
- wisent/synthetic/generators/core/__init__.py +0 -0
- wisent/synthetic/generators/core/atoms.py +73 -0
- wisent/synthetic/generators/diversities/__init__.py +0 -0
- wisent/synthetic/generators/diversities/core/__init__.py +0 -0
- wisent/synthetic/generators/diversities/core/core.py +68 -0
- wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
- wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
- wisent/synthetic/generators/pairs_generator.py +179 -0
- wisent-0.5.1.dist-info/METADATA +67 -0
- wisent-0.5.1.dist-info/RECORD +218 -0
- {wisent-0.1.1.dist-info ā wisent-0.5.1.dist-info}/WHEEL +1 -1
- {wisent-0.1.1.dist-info ā wisent-0.5.1.dist-info/licenses}/LICENSE +2 -2
- wisent/activations/__init__.py +0 -9
- wisent/activations/client.py +0 -97
- wisent/activations/extractor.py +0 -251
- wisent/activations/models.py +0 -95
- wisent/client.py +0 -45
- wisent/control_vector/__init__.py +0 -9
- wisent/control_vector/client.py +0 -85
- wisent/control_vector/manager.py +0 -168
- wisent/control_vector/models.py +0 -70
- wisent/inference/__init__.py +0 -9
- wisent/inference/client.py +0 -103
- wisent/inference/inferencer.py +0 -250
- wisent/inference/models.py +0 -66
- wisent/utils/__init__.py +0 -3
- wisent/utils/auth.py +0 -30
- wisent/utils/http.py +0 -228
- wisent/version.py +0 -3
- wisent-0.1.1.dist-info/METADATA +0 -142
- wisent-0.1.1.dist-info/RECORD +0 -23
- {wisent-0.1.1.dist-info ā wisent-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Classifier Selection System for Autonomous Agent
|
|
3
|
+
|
|
4
|
+
This module handles:
|
|
5
|
+
- Auto-discovery of existing trained classifiers
|
|
6
|
+
- Intelligent selection of classifiers based on task requirements
|
|
7
|
+
- Performance-based classifier ranking and filtering
|
|
8
|
+
- Model-specific classifier matching
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import glob
|
|
13
|
+
import json
|
|
14
|
+
import pickle
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Dict, List, Any, Optional, Tuple
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from ...model_persistence import ModelPersistence
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ClassifierInfo:
|
|
23
|
+
"""Information about a discovered classifier."""
|
|
24
|
+
path: str
|
|
25
|
+
layer: int
|
|
26
|
+
issue_type: str
|
|
27
|
+
threshold: float
|
|
28
|
+
metadata: Dict[str, Any]
|
|
29
|
+
performance_score: float = 0.0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class SelectionCriteria:
|
|
34
|
+
"""Criteria for selecting classifiers."""
|
|
35
|
+
required_issue_types: List[str]
|
|
36
|
+
preferred_layers: Optional[List[int]] = None
|
|
37
|
+
min_performance_score: float = 0.0
|
|
38
|
+
max_classifiers: int = 10
|
|
39
|
+
model_name: Optional[str] = None
|
|
40
|
+
task_type: Optional[str] = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ClassifierSelector:
|
|
44
|
+
"""Intelligent classifier selection system."""
|
|
45
|
+
|
|
46
|
+
def __init__(self, search_paths: List[str] = None):
|
|
47
|
+
"""
|
|
48
|
+
Initialize the classifier selector.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
search_paths: Directories to search for classifiers. Defaults to common locations.
|
|
52
|
+
"""
|
|
53
|
+
self.search_paths = search_paths or [
|
|
54
|
+
"./models",
|
|
55
|
+
"./optimization_results",
|
|
56
|
+
"./trained_classifiers",
|
|
57
|
+
"./examples/models",
|
|
58
|
+
"." # Current directory
|
|
59
|
+
]
|
|
60
|
+
self.discovered_classifiers: List[ClassifierInfo] = []
|
|
61
|
+
|
|
62
|
+
def discover_classifiers(self) -> List[ClassifierInfo]:
|
|
63
|
+
"""
|
|
64
|
+
Auto-discover all available trained classifiers.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
List of discovered classifier information
|
|
68
|
+
"""
|
|
69
|
+
print("š Discovering available classifiers...")
|
|
70
|
+
|
|
71
|
+
self.discovered_classifiers = []
|
|
72
|
+
|
|
73
|
+
for search_path in self.search_paths:
|
|
74
|
+
if not os.path.exists(search_path):
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
print(f" Searching in: {search_path}")
|
|
78
|
+
|
|
79
|
+
# Search for various classifier file patterns
|
|
80
|
+
patterns = [
|
|
81
|
+
"**/*_classifier.pkl",
|
|
82
|
+
"**/*classifier*.pkl",
|
|
83
|
+
"**/classifier_layer_*.pkl",
|
|
84
|
+
"**/trained_classifier_*.pkl",
|
|
85
|
+
"**/*_layer_*.pkl"
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
for pattern in patterns:
|
|
89
|
+
classifier_files = glob.glob(os.path.join(search_path, pattern), recursive=True)
|
|
90
|
+
|
|
91
|
+
for filepath in classifier_files:
|
|
92
|
+
classifier_info = self._analyze_classifier_file(filepath)
|
|
93
|
+
if classifier_info:
|
|
94
|
+
self.discovered_classifiers.append(classifier_info)
|
|
95
|
+
|
|
96
|
+
# Remove duplicates based on path
|
|
97
|
+
unique_classifiers = {}
|
|
98
|
+
for classifier in self.discovered_classifiers:
|
|
99
|
+
unique_classifiers[classifier.path] = classifier
|
|
100
|
+
self.discovered_classifiers = list(unique_classifiers.values())
|
|
101
|
+
|
|
102
|
+
print(f" ā
Discovered {len(self.discovered_classifiers)} classifiers")
|
|
103
|
+
|
|
104
|
+
# Sort by performance score (highest first)
|
|
105
|
+
self.discovered_classifiers.sort(key=lambda x: x.performance_score, reverse=True)
|
|
106
|
+
|
|
107
|
+
return self.discovered_classifiers
|
|
108
|
+
|
|
109
|
+
def _analyze_classifier_file(self, filepath: str) -> Optional[ClassifierInfo]:
|
|
110
|
+
"""
|
|
111
|
+
Analyze a classifier file and extract information.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
filepath: Path to classifier file
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
ClassifierInfo if valid, None otherwise
|
|
118
|
+
"""
|
|
119
|
+
try:
|
|
120
|
+
# Extract layer and issue type from filename
|
|
121
|
+
layer, issue_type = self._parse_classifier_filename(filepath)
|
|
122
|
+
|
|
123
|
+
# Load metadata if available
|
|
124
|
+
metadata = self._load_classifier_metadata(filepath)
|
|
125
|
+
|
|
126
|
+
# Calculate performance score
|
|
127
|
+
performance_score = self._calculate_performance_score(metadata)
|
|
128
|
+
|
|
129
|
+
# Determine threshold
|
|
130
|
+
threshold = metadata.get('detection_threshold', 0.5)
|
|
131
|
+
|
|
132
|
+
return ClassifierInfo(
|
|
133
|
+
path=filepath,
|
|
134
|
+
layer=layer,
|
|
135
|
+
issue_type=issue_type,
|
|
136
|
+
threshold=threshold,
|
|
137
|
+
metadata=metadata,
|
|
138
|
+
performance_score=performance_score
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
print(f" ā ļø Failed to analyze {filepath}: {e}")
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
def _parse_classifier_filename(self, filepath: str) -> Tuple[int, str]:
|
|
146
|
+
"""
|
|
147
|
+
Parse classifier filename to extract layer and issue type.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
filepath: Path to classifier file
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Tuple of (layer, issue_type)
|
|
154
|
+
"""
|
|
155
|
+
filename = os.path.basename(filepath)
|
|
156
|
+
|
|
157
|
+
# Pattern: classifier_layer_X_*.pkl
|
|
158
|
+
if "classifier_layer_" in filename:
|
|
159
|
+
parts = filename.split("_")
|
|
160
|
+
layer_idx = parts.index("layer") + 1 if "layer" in parts else 2
|
|
161
|
+
if layer_idx < len(parts):
|
|
162
|
+
layer = int(parts[layer_idx])
|
|
163
|
+
issue_type = "_".join(parts[:parts.index("layer")])
|
|
164
|
+
return layer, issue_type
|
|
165
|
+
|
|
166
|
+
# Pattern: trained_classifier_*_layer_X.pkl
|
|
167
|
+
elif "trained_classifier_" in filename and "_layer_" in filename:
|
|
168
|
+
layer_part = filename.split("_layer_")[-1]
|
|
169
|
+
layer = int(layer_part.split(".")[0])
|
|
170
|
+
issue_type = filename.split("trained_classifier_")[1].split("_layer_")[0]
|
|
171
|
+
return layer, issue_type
|
|
172
|
+
|
|
173
|
+
# Pattern: issue_type_classifier.pkl or issue_type_model_classifier.pkl
|
|
174
|
+
elif "_classifier" in filename:
|
|
175
|
+
parts = filename.replace("_classifier.pkl", "").split("_")
|
|
176
|
+
# Default layer if not specified
|
|
177
|
+
layer = 15
|
|
178
|
+
issue_type = "_".join(parts[:-1]) if len(parts) > 1 else parts[0]
|
|
179
|
+
return layer, issue_type
|
|
180
|
+
|
|
181
|
+
# Fallback: extract from path structure
|
|
182
|
+
else:
|
|
183
|
+
path_parts = Path(filepath).parts
|
|
184
|
+
layer = 15 # Default
|
|
185
|
+
issue_type = "unknown"
|
|
186
|
+
|
|
187
|
+
# Look for layer information in path
|
|
188
|
+
for part in path_parts:
|
|
189
|
+
if "layer" in part.lower():
|
|
190
|
+
try:
|
|
191
|
+
layer = int(part.split("_")[-1])
|
|
192
|
+
except:
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
return layer, issue_type
|
|
196
|
+
|
|
197
|
+
def _load_classifier_metadata(self, filepath: str) -> Dict[str, Any]:
|
|
198
|
+
"""
|
|
199
|
+
Load classifier metadata if available.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
filepath: Path to classifier file
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Metadata dictionary
|
|
206
|
+
"""
|
|
207
|
+
metadata = {}
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
# Try to load classifier file to get metadata
|
|
211
|
+
with open(filepath, 'rb') as f:
|
|
212
|
+
data = pickle.load(f)
|
|
213
|
+
|
|
214
|
+
if isinstance(data, dict):
|
|
215
|
+
metadata = data.get('metadata', {})
|
|
216
|
+
|
|
217
|
+
except Exception as e:
|
|
218
|
+
# Skip corrupted files
|
|
219
|
+
print(f" ā ļø Skipping corrupted classifier file {filepath}: {e}")
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
# Look for associated metadata files
|
|
223
|
+
metadata_paths = [
|
|
224
|
+
filepath.replace('.pkl', '_metadata.json'),
|
|
225
|
+
filepath.replace('.pkl', '.json'),
|
|
226
|
+
os.path.join(os.path.dirname(filepath), 'metadata.json')
|
|
227
|
+
]
|
|
228
|
+
|
|
229
|
+
for metadata_path in metadata_paths:
|
|
230
|
+
if os.path.exists(metadata_path):
|
|
231
|
+
try:
|
|
232
|
+
with open(metadata_path, 'r') as f:
|
|
233
|
+
file_metadata = json.load(f)
|
|
234
|
+
metadata.update(file_metadata)
|
|
235
|
+
break
|
|
236
|
+
except Exception:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
return metadata
|
|
240
|
+
|
|
241
|
+
def _calculate_performance_score(self, metadata: Dict[str, Any]) -> float:
|
|
242
|
+
"""
|
|
243
|
+
Calculate a performance score for the classifier.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
metadata: Classifier metadata
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Performance score (0.0 to 1.0)
|
|
250
|
+
"""
|
|
251
|
+
score = 0.0
|
|
252
|
+
|
|
253
|
+
# Base score from F1 or accuracy
|
|
254
|
+
f1_score = metadata.get('f1', metadata.get('training_f1', 0.0))
|
|
255
|
+
accuracy = metadata.get('accuracy', metadata.get('training_accuracy', 0.0))
|
|
256
|
+
|
|
257
|
+
if f1_score > 0:
|
|
258
|
+
score += f1_score * 0.6
|
|
259
|
+
elif accuracy > 0:
|
|
260
|
+
score += accuracy * 0.4
|
|
261
|
+
|
|
262
|
+
# Bonus for larger training sets
|
|
263
|
+
training_samples = metadata.get('training_samples', 0)
|
|
264
|
+
if training_samples > 0:
|
|
265
|
+
sample_bonus = min(training_samples / 1000, 0.2) # Max 0.2 bonus
|
|
266
|
+
score += sample_bonus
|
|
267
|
+
|
|
268
|
+
# Bonus for recent training
|
|
269
|
+
if 'created_at' in metadata:
|
|
270
|
+
try:
|
|
271
|
+
from datetime import datetime
|
|
272
|
+
created_at = datetime.fromisoformat(metadata['created_at'])
|
|
273
|
+
days_old = (datetime.now() - created_at).days
|
|
274
|
+
if days_old < 30: # Recent training
|
|
275
|
+
score += 0.1
|
|
276
|
+
except:
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
return min(score, 1.0) # Cap at 1.0
|
|
280
|
+
|
|
281
|
+
def select_classifiers(self, criteria: SelectionCriteria) -> List[Dict[str, Any]]:
|
|
282
|
+
"""
|
|
283
|
+
Select the best classifiers based on criteria.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
criteria: Selection criteria
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
List of classifier configurations ready for use
|
|
290
|
+
"""
|
|
291
|
+
print(f"šÆ Selecting classifiers for: {criteria.required_issue_types}")
|
|
292
|
+
|
|
293
|
+
# Ensure we've discovered classifiers
|
|
294
|
+
if not self.discovered_classifiers:
|
|
295
|
+
self.discover_classifiers()
|
|
296
|
+
|
|
297
|
+
selected_classifiers = []
|
|
298
|
+
|
|
299
|
+
# For each required issue type, find the best classifier
|
|
300
|
+
for issue_type in criteria.required_issue_types:
|
|
301
|
+
best_classifier = self._find_best_classifier_for_issue_type(
|
|
302
|
+
issue_type, criteria
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
if best_classifier:
|
|
306
|
+
config = {
|
|
307
|
+
"path": best_classifier.path,
|
|
308
|
+
"layer": best_classifier.layer,
|
|
309
|
+
"issue_type": best_classifier.issue_type,
|
|
310
|
+
"threshold": best_classifier.threshold
|
|
311
|
+
}
|
|
312
|
+
selected_classifiers.append(config)
|
|
313
|
+
print(f" ā
Selected for {issue_type}: {os.path.basename(best_classifier.path)} "
|
|
314
|
+
f"(layer {best_classifier.layer}, score: {best_classifier.performance_score:.3f})")
|
|
315
|
+
else:
|
|
316
|
+
print(f" ā No classifier found for {issue_type}")
|
|
317
|
+
raise ValueError(f"No suitable classifier found for issue type: {issue_type}")
|
|
318
|
+
|
|
319
|
+
# Add additional high-performing classifiers if space allows
|
|
320
|
+
if len(selected_classifiers) < criteria.max_classifiers:
|
|
321
|
+
self._add_supplementary_classifiers(selected_classifiers, criteria)
|
|
322
|
+
|
|
323
|
+
print(f" š Final selection: {len(selected_classifiers)} classifiers")
|
|
324
|
+
return selected_classifiers
|
|
325
|
+
|
|
326
|
+
def _find_best_classifier_for_issue_type(
|
|
327
|
+
self,
|
|
328
|
+
issue_type: str,
|
|
329
|
+
criteria: SelectionCriteria
|
|
330
|
+
) -> Optional[ClassifierInfo]:
|
|
331
|
+
"""
|
|
332
|
+
Find the best classifier for a specific issue type.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
issue_type: The issue type to find a classifier for
|
|
336
|
+
criteria: Selection criteria
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Best matching classifier or None
|
|
340
|
+
"""
|
|
341
|
+
candidates = []
|
|
342
|
+
|
|
343
|
+
for classifier in self.discovered_classifiers:
|
|
344
|
+
# Check if it matches the issue type (exact or partial match)
|
|
345
|
+
if (classifier.issue_type == issue_type or
|
|
346
|
+
issue_type in classifier.issue_type or
|
|
347
|
+
classifier.issue_type in issue_type):
|
|
348
|
+
|
|
349
|
+
# Check performance threshold
|
|
350
|
+
if classifier.performance_score >= criteria.min_performance_score:
|
|
351
|
+
|
|
352
|
+
# Check layer preferences
|
|
353
|
+
if (criteria.preferred_layers is None or
|
|
354
|
+
classifier.layer in criteria.preferred_layers):
|
|
355
|
+
|
|
356
|
+
# Check model compatibility
|
|
357
|
+
if self._is_model_compatible(classifier, criteria.model_name):
|
|
358
|
+
candidates.append(classifier)
|
|
359
|
+
|
|
360
|
+
# Return the best candidate (highest performance score)
|
|
361
|
+
return max(candidates, key=lambda x: x.performance_score) if candidates else None
|
|
362
|
+
|
|
363
|
+
def _is_model_compatible(self, classifier: ClassifierInfo, model_name: Optional[str]) -> bool:
|
|
364
|
+
"""
|
|
365
|
+
Check if classifier is compatible with the specified model.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
classifier: Classifier information
|
|
369
|
+
model_name: Target model name
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
True if compatible
|
|
373
|
+
"""
|
|
374
|
+
if not model_name:
|
|
375
|
+
return True
|
|
376
|
+
|
|
377
|
+
# Check metadata for model compatibility
|
|
378
|
+
classifier_model = classifier.metadata.get('model_name', '')
|
|
379
|
+
|
|
380
|
+
if not classifier_model:
|
|
381
|
+
return True # No model info available, assume compatible
|
|
382
|
+
|
|
383
|
+
# Extract model family (e.g., "llama", "mistral")
|
|
384
|
+
target_family = self._extract_model_family(model_name)
|
|
385
|
+
classifier_family = self._extract_model_family(classifier_model)
|
|
386
|
+
|
|
387
|
+
return target_family == classifier_family
|
|
388
|
+
|
|
389
|
+
def _extract_model_family(self, model_name: str) -> str:
|
|
390
|
+
"""Extract model family from model name."""
|
|
391
|
+
model_name = model_name.lower()
|
|
392
|
+
|
|
393
|
+
if 'llama' in model_name:
|
|
394
|
+
return 'llama'
|
|
395
|
+
elif 'mistral' in model_name:
|
|
396
|
+
return 'mistral'
|
|
397
|
+
elif 'gemma' in model_name:
|
|
398
|
+
return 'gemma'
|
|
399
|
+
elif 'qwen' in model_name:
|
|
400
|
+
return 'qwen'
|
|
401
|
+
else:
|
|
402
|
+
return 'unknown'
|
|
403
|
+
|
|
404
|
+
def _add_supplementary_classifiers(
|
|
405
|
+
self,
|
|
406
|
+
selected_classifiers: List[Dict[str, Any]],
|
|
407
|
+
criteria: SelectionCriteria
|
|
408
|
+
):
|
|
409
|
+
"""
|
|
410
|
+
Add supplementary high-performing classifiers if space allows.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
selected_classifiers: Currently selected classifiers (modified in place)
|
|
414
|
+
criteria: Selection criteria
|
|
415
|
+
"""
|
|
416
|
+
selected_paths = {config["path"] for config in selected_classifiers}
|
|
417
|
+
|
|
418
|
+
for classifier in self.discovered_classifiers:
|
|
419
|
+
if len(selected_classifiers) >= criteria.max_classifiers:
|
|
420
|
+
break
|
|
421
|
+
|
|
422
|
+
if (classifier.path not in selected_paths and
|
|
423
|
+
classifier.performance_score >= criteria.min_performance_score):
|
|
424
|
+
|
|
425
|
+
config = {
|
|
426
|
+
"path": classifier.path,
|
|
427
|
+
"layer": classifier.layer,
|
|
428
|
+
"issue_type": classifier.issue_type,
|
|
429
|
+
"threshold": classifier.threshold
|
|
430
|
+
}
|
|
431
|
+
selected_classifiers.append(config)
|
|
432
|
+
selected_paths.add(classifier.path)
|
|
433
|
+
print(f" ā Added supplementary: {os.path.basename(classifier.path)} "
|
|
434
|
+
f"({classifier.issue_type}, score: {classifier.performance_score:.3f})")
|
|
435
|
+
|
|
436
|
+
def get_classifier_summary(self) -> str:
|
|
437
|
+
"""
|
|
438
|
+
Get a summary of discovered classifiers.
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
Formatted summary string
|
|
442
|
+
"""
|
|
443
|
+
if not self.discovered_classifiers:
|
|
444
|
+
return "No classifiers discovered yet. Run discover_classifiers() first."
|
|
445
|
+
|
|
446
|
+
summary = f"\nš Classifier Discovery Summary\n"
|
|
447
|
+
summary += f"{'='*50}\n"
|
|
448
|
+
summary += f"Total Classifiers: {len(self.discovered_classifiers)}\n\n"
|
|
449
|
+
|
|
450
|
+
# Group by issue type
|
|
451
|
+
by_issue_type = {}
|
|
452
|
+
for classifier in self.discovered_classifiers:
|
|
453
|
+
issue_type = classifier.issue_type
|
|
454
|
+
if issue_type not in by_issue_type:
|
|
455
|
+
by_issue_type[issue_type] = []
|
|
456
|
+
by_issue_type[issue_type].append(classifier)
|
|
457
|
+
|
|
458
|
+
for issue_type, classifiers in by_issue_type.items():
|
|
459
|
+
summary += f"{issue_type.upper()}: {len(classifiers)} classifiers\n"
|
|
460
|
+
for classifier in sorted(classifiers, key=lambda x: x.performance_score, reverse=True)[:3]:
|
|
461
|
+
summary += f" ⢠{os.path.basename(classifier.path)} "
|
|
462
|
+
summary += f"(layer {classifier.layer}, score: {classifier.performance_score:.3f})\n"
|
|
463
|
+
summary += "\n"
|
|
464
|
+
|
|
465
|
+
return summary
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def auto_select_classifiers_for_agent(
|
|
469
|
+
model_name: str,
|
|
470
|
+
required_issue_types: List[str] = None,
|
|
471
|
+
search_paths: List[str] = None,
|
|
472
|
+
max_classifiers: int = 5,
|
|
473
|
+
min_performance: float = 0.1
|
|
474
|
+
) -> List[Dict[str, Any]]:
|
|
475
|
+
"""
|
|
476
|
+
Auto-select the best classifiers for an autonomous agent.
|
|
477
|
+
|
|
478
|
+
Args:
|
|
479
|
+
model_name: Name of the model being used
|
|
480
|
+
required_issue_types: List of required issue types to detect
|
|
481
|
+
search_paths: Custom search paths for classifiers
|
|
482
|
+
max_classifiers: Maximum number of classifiers to select
|
|
483
|
+
min_performance: Minimum performance score required
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
List of classifier configurations ready for use
|
|
487
|
+
"""
|
|
488
|
+
# Default issue types for comprehensive analysis
|
|
489
|
+
if required_issue_types is None:
|
|
490
|
+
required_issue_types = [
|
|
491
|
+
"hallucination",
|
|
492
|
+
"quality",
|
|
493
|
+
"harmful",
|
|
494
|
+
"bias"
|
|
495
|
+
]
|
|
496
|
+
|
|
497
|
+
selector = ClassifierSelector(search_paths)
|
|
498
|
+
|
|
499
|
+
criteria = SelectionCriteria(
|
|
500
|
+
required_issue_types=required_issue_types,
|
|
501
|
+
max_classifiers=max_classifiers,
|
|
502
|
+
min_performance_score=min_performance,
|
|
503
|
+
model_name=model_name
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
return selector.select_classifiers(criteria)
|