wisent 0.5.12__py3-none-any.whl → 0.5.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wisent might be problematic. Click here for more details.
- wisent/__init__.py +1 -1
- wisent/core/activations/__init__.py +26 -0
- wisent/core/activations/activations.py +96 -0
- wisent/core/activations/activations_collector.py +71 -20
- wisent/core/activations/prompt_construction_strategy.py +47 -0
- wisent/core/agent/__init__.py +1 -18
- wisent/core/agent/budget.py +2 -2
- wisent/core/agent/device_benchmarks.py +1 -1
- wisent/core/agent/diagnose/__init__.py +1 -55
- wisent/core/agent/diagnose/classifier_marketplace.py +8 -8
- wisent/core/agent/diagnose/response_diagnostics.py +4 -4
- wisent/core/agent/diagnose/synthetic_classifier_option.py +1 -1
- wisent/core/agent/diagnose/tasks/task_manager.py +3 -3
- wisent/core/agent/diagnose.py +2 -1
- wisent/core/autonomous_agent.py +10 -2
- wisent/core/benchmark_extractors.py +293 -0
- wisent/core/bigcode_integration.py +20 -7
- wisent/core/branding.py +108 -0
- wisent/core/cli/__init__.py +15 -0
- wisent/core/cli/create_steering_vector.py +138 -0
- wisent/core/cli/evaluate_responses.py +715 -0
- wisent/core/cli/generate_pairs.py +128 -0
- wisent/core/cli/generate_pairs_from_task.py +119 -0
- wisent/core/cli/generate_responses.py +129 -0
- wisent/core/cli/generate_vector_from_synthetic.py +149 -0
- wisent/core/cli/generate_vector_from_task.py +147 -0
- wisent/core/cli/get_activations.py +191 -0
- wisent/core/cli/optimize_classification.py +339 -0
- wisent/core/cli/optimize_steering.py +364 -0
- wisent/core/cli/tasks.py +182 -0
- wisent/core/cli_logger.py +22 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +27 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +49 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +146 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +129 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livecodebench.py +367 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +116 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad2.py +124 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sst2.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +127 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_gen.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +127 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +1 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +1 -1
- wisent/core/data_loaders/__init__.py +235 -0
- wisent/core/data_loaders/loaders/lm_loader.py +2 -2
- wisent/core/data_loaders/loaders/task_interface_loader.py +300 -0
- wisent/{cli/data_loaders/data_loader_rotator.py → core/data_loaders/rotator.py} +1 -1
- wisent/core/download_full_benchmarks.py +79 -2
- wisent/core/evaluators/benchmark_specific/__init__.py +26 -0
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/evaluator.py +17 -17
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/cpp_sanitizer.py +2 -2
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/java_sanitizer.py +2 -2
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/python_sanitizer.py +2 -2
- wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/__init__.py +3 -0
- wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/provider.py +305 -0
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/runtime.py +36 -4
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/entrypoint.py +2 -4
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/recipes.py +1 -1
- wisent/core/evaluators/benchmark_specific/coding/solution_generator.py +258 -0
- wisent/core/evaluators/benchmark_specific/exact_match_evaluator.py +79 -0
- wisent/core/evaluators/benchmark_specific/f1_evaluator.py +101 -0
- wisent/core/evaluators/benchmark_specific/generation_evaluator.py +197 -0
- wisent/core/{log_likelihoods_evaluator.py → evaluators/benchmark_specific/log_likelihoods_evaluator.py} +10 -2
- wisent/core/evaluators/benchmark_specific/perplexity_evaluator.py +140 -0
- wisent/core/evaluators/benchmark_specific/personalization_evaluator.py +250 -0
- wisent/{cli/evaluators/evaluator_rotator.py → core/evaluators/rotator.py} +4 -4
- wisent/core/lm_eval_harness_ground_truth.py +3 -2
- wisent/core/main.py +57 -0
- wisent/core/model_persistence.py +2 -2
- wisent/core/models/wisent_model.py +6 -6
- wisent/core/optuna/classifier/optuna_classifier_optimizer.py +2 -2
- wisent/core/optuna/steering/steering_optimization.py +1 -1
- wisent/core/parser_arguments/__init__.py +10 -0
- wisent/core/parser_arguments/agent_parser.py +110 -0
- wisent/core/parser_arguments/configure_model_parser.py +7 -0
- wisent/core/parser_arguments/create_steering_vector_parser.py +59 -0
- wisent/core/parser_arguments/evaluate_parser.py +40 -0
- wisent/core/parser_arguments/evaluate_responses_parser.py +10 -0
- wisent/core/parser_arguments/full_optimize_parser.py +115 -0
- wisent/core/parser_arguments/generate_pairs_from_task_parser.py +33 -0
- wisent/core/parser_arguments/generate_pairs_parser.py +29 -0
- wisent/core/parser_arguments/generate_responses_parser.py +15 -0
- wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +127 -0
- wisent/core/parser_arguments/generate_vector_from_task_parser.py +127 -0
- wisent/core/parser_arguments/generate_vector_parser.py +90 -0
- wisent/core/parser_arguments/get_activations_parser.py +90 -0
- wisent/core/parser_arguments/main_parser.py +152 -0
- wisent/core/parser_arguments/model_config_parser.py +59 -0
- wisent/core/parser_arguments/monitor_parser.py +17 -0
- wisent/core/parser_arguments/multi_steer_parser.py +47 -0
- wisent/core/parser_arguments/optimize_classification_parser.py +67 -0
- wisent/core/parser_arguments/optimize_sample_size_parser.py +58 -0
- wisent/core/parser_arguments/optimize_steering_parser.py +147 -0
- wisent/core/parser_arguments/synthetic_parser.py +93 -0
- wisent/core/parser_arguments/tasks_parser.py +584 -0
- wisent/core/parser_arguments/test_nonsense_parser.py +26 -0
- wisent/core/parser_arguments/utils.py +111 -0
- wisent/core/prompts/core/prompt_formater.py +3 -3
- wisent/core/prompts/prompt_stratiegies/direct_completion.py +2 -0
- wisent/core/prompts/prompt_stratiegies/instruction_following.py +2 -0
- wisent/core/prompts/prompt_stratiegies/multiple_choice.py +2 -0
- wisent/core/prompts/prompt_stratiegies/role_playing.py +2 -0
- wisent/{cli/steering_methods/steering_rotator.py → core/steering_methods/rotator.py} +4 -4
- wisent/core/steering_optimizer.py +45 -21
- wisent/{synthetic → core/synthetic}/cleaners/deduper_cleaner.py +3 -3
- wisent/{synthetic → core/synthetic}/cleaners/methods/base_dedupers.py +2 -2
- wisent/{synthetic → core/synthetic}/cleaners/methods/base_refusalers.py +1 -1
- wisent/{synthetic → core/synthetic}/cleaners/pairs_cleaner.py +5 -5
- wisent/{synthetic → core/synthetic}/cleaners/refusaler_cleaner.py +4 -4
- wisent/{synthetic → core/synthetic}/db_instructions/mini_dp.py +1 -1
- wisent/{synthetic → core/synthetic}/generators/diversities/methods/fast_diversity.py +1 -1
- wisent/{synthetic → core/synthetic}/generators/pairs_generator.py +38 -12
- wisent/core/tasks/livecodebench_task.py +4 -103
- wisent/core/timing_calibration.py +1 -1
- {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/METADATA +3 -3
- wisent-0.5.14.dist-info/RECORD +294 -0
- wisent-0.5.14.dist-info/entry_points.txt +2 -0
- wisent/benchmarks/coding/providers/livecodebench/provider.py +0 -53
- wisent/classifiers/core/atoms.py +0 -747
- wisent/classifiers/models/logistic.py +0 -29
- wisent/classifiers/models/mlp.py +0 -47
- wisent/cli/classifiers/classifier_rotator.py +0 -137
- wisent/cli/cli_logger.py +0 -142
- wisent/cli/wisent_cli/commands/help_cmd.py +0 -52
- wisent/cli/wisent_cli/commands/listing.py +0 -154
- wisent/cli/wisent_cli/commands/train_cmd.py +0 -322
- wisent/cli/wisent_cli/main.py +0 -93
- wisent/cli/wisent_cli/shell.py +0 -80
- wisent/cli/wisent_cli/ui.py +0 -69
- wisent/cli/wisent_cli/util/aggregations.py +0 -43
- wisent/cli/wisent_cli/util/parsing.py +0 -126
- wisent/cli/wisent_cli/version.py +0 -4
- wisent/opti/methods/__init__.py +0 -0
- wisent/synthetic/__init__.py +0 -0
- wisent/synthetic/cleaners/__init__.py +0 -0
- wisent/synthetic/cleaners/core/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
- wisent/synthetic/db_instructions/__init__.py +0 -0
- wisent/synthetic/db_instructions/core/__init__.py +0 -0
- wisent/synthetic/generators/__init__.py +0 -0
- wisent/synthetic/generators/core/__init__.py +0 -0
- wisent/synthetic/generators/diversities/__init__.py +0 -0
- wisent/synthetic/generators/diversities/core/__init__.py +0 -0
- wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
- wisent-0.5.12.dist-info/RECORD +0 -220
- /wisent/{benchmarks → core/evaluators/benchmark_specific/coding}/__init__.py +0 -0
- /wisent/{benchmarks/coding → core/evaluators/benchmark_specific/coding/metrics}/__init__.py +0 -0
- /wisent/{benchmarks/coding/metrics → core/evaluators/benchmark_specific/coding/metrics/core}/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/core/atoms.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/passk.py +0 -0
- /wisent/{benchmarks/coding/metrics/core → core/evaluators/benchmark_specific/coding/output_sanitizer}/__init__.py +0 -0
- /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/output_sanitizer/core}/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/core/atoms.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/utils.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/__init__.py +0 -0
- /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/providers}/core/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/core/atoms.py +0 -0
- /wisent/{benchmarks/coding/providers/core → core/evaluators/benchmark_specific/coding/safe_docker}/__init__.py +0 -0
- /wisent/{benchmarks/coding/providers/livecodebench → core/evaluators/benchmark_specific/coding/safe_docker/core}/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/atoms.py +0 -0
- /wisent/{benchmarks/coding/safe_docker → core/opti}/__init__.py +0 -0
- /wisent/{benchmarks/coding/safe_docker → core/opti}/core/__init__.py +0 -0
- /wisent/{opti → core/opti}/core/atoms.py +0 -0
- /wisent/{classifiers → core/opti/methods}/__init__.py +0 -0
- /wisent/{opti → core/opti}/methods/opti_classificator.py +0 -0
- /wisent/{opti → core/opti}/methods/opti_steering.py +0 -0
- /wisent/{classifiers/core → core/synthetic}/__init__.py +0 -0
- /wisent/{classifiers/models → core/synthetic/cleaners}/__init__.py +0 -0
- /wisent/{cli → core/synthetic/cleaners/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/cleaners/core/atoms.py +0 -0
- /wisent/{cli/classifiers → core/synthetic/cleaners/methods}/__init__.py +0 -0
- /wisent/{cli/data_loaders → core/synthetic/cleaners/methods/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/cleaners/methods/core/atoms.py +0 -0
- /wisent/{cli/evaluators → core/synthetic/db_instructions}/__init__.py +0 -0
- /wisent/{cli/steering_methods → core/synthetic/db_instructions/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/db_instructions/core/atoms.py +0 -0
- /wisent/{cli/wisent_cli → core/synthetic/generators}/__init__.py +0 -0
- /wisent/{cli/wisent_cli/commands → core/synthetic/generators/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/generators/core/atoms.py +0 -0
- /wisent/{cli/wisent_cli/util → core/synthetic/generators/diversities}/__init__.py +0 -0
- /wisent/{opti → core/synthetic/generators/diversities/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/generators/diversities/core/core.py +0 -0
- /wisent/{opti/core → core/synthetic/generators/diversities/methods}/__init__.py +0 -0
- {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/WHEEL +0 -0
- {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/licenses/LICENSE +0 -0
- {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/top_level.txt +0 -0
wisent/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.14"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Activation collection and management."""
|
|
2
|
+
|
|
3
|
+
from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
|
|
4
|
+
from wisent.core.activations.core.atoms import (
|
|
5
|
+
ActivationAggregationStrategy,
|
|
6
|
+
LayerActivations,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"ActivationCollector",
|
|
11
|
+
"Activations",
|
|
12
|
+
"PromptConstructionStrategy",
|
|
13
|
+
"ActivationAggregationStrategy",
|
|
14
|
+
"LayerActivations",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def __getattr__(name):
|
|
19
|
+
"""Lazy import to avoid circular dependencies."""
|
|
20
|
+
if name == "ActivationCollector":
|
|
21
|
+
from wisent.core.activations.activations_collector import ActivationCollector
|
|
22
|
+
return ActivationCollector
|
|
23
|
+
if name == "Activations":
|
|
24
|
+
from wisent.core.activations.activations import Activations
|
|
25
|
+
return Activations
|
|
26
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Activation wrapper for classifier feature extraction."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
import torch
|
|
5
|
+
from wisent.core.activations.core.atoms import ActivationAggregationStrategy
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Activations:
|
|
9
|
+
"""Wrapper for activation tensors with aggregation strategy.
|
|
10
|
+
|
|
11
|
+
This class wraps activation tensors and provides methods to extract
|
|
12
|
+
features for classifier input based on the specified aggregation strategy.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, tensor: torch.Tensor, layer: Any, aggregation_strategy):
|
|
16
|
+
"""Initialize Activations wrapper.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
tensor: Activation tensor (typically shape [batch, seq_len, hidden_dim])
|
|
20
|
+
layer: Layer object containing layer metadata
|
|
21
|
+
aggregation_strategy: Strategy for aggregating tokens (string or ActivationAggregationStrategy enum)
|
|
22
|
+
"""
|
|
23
|
+
self.tensor = tensor
|
|
24
|
+
self.layer = layer
|
|
25
|
+
|
|
26
|
+
# Convert string to enum if needed
|
|
27
|
+
if isinstance(aggregation_strategy, str):
|
|
28
|
+
# Map common string values to enum
|
|
29
|
+
strategy_map = {
|
|
30
|
+
"average": ActivationAggregationStrategy.MEAN_POOLING,
|
|
31
|
+
"mean": ActivationAggregationStrategy.MEAN_POOLING,
|
|
32
|
+
"final": ActivationAggregationStrategy.LAST_TOKEN,
|
|
33
|
+
"last": ActivationAggregationStrategy.LAST_TOKEN,
|
|
34
|
+
"first": ActivationAggregationStrategy.FIRST_TOKEN,
|
|
35
|
+
"max": ActivationAggregationStrategy.MAX_POOLING,
|
|
36
|
+
"mean_pooling": ActivationAggregationStrategy.MEAN_POOLING,
|
|
37
|
+
"last_token": ActivationAggregationStrategy.LAST_TOKEN,
|
|
38
|
+
"first_token": ActivationAggregationStrategy.FIRST_TOKEN,
|
|
39
|
+
"max_pooling": ActivationAggregationStrategy.MAX_POOLING,
|
|
40
|
+
}
|
|
41
|
+
self.aggregation_strategy = strategy_map.get(
|
|
42
|
+
aggregation_strategy.lower(),
|
|
43
|
+
ActivationAggregationStrategy.MEAN_POOLING
|
|
44
|
+
)
|
|
45
|
+
else:
|
|
46
|
+
self.aggregation_strategy = aggregation_strategy
|
|
47
|
+
|
|
48
|
+
def extract_features_for_classifier(self) -> torch.Tensor:
|
|
49
|
+
"""Extract features from activations for classifier input.
|
|
50
|
+
|
|
51
|
+
Aggregates the activation tensor based on the specified strategy
|
|
52
|
+
to produce a single feature vector suitable for classifier input.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
torch.Tensor: Aggregated features (typically shape [hidden_dim])
|
|
56
|
+
"""
|
|
57
|
+
if self.tensor is None:
|
|
58
|
+
raise ValueError("Cannot extract features from None tensor")
|
|
59
|
+
|
|
60
|
+
# Ensure tensor is 3D: [batch, seq_len, hidden_dim]
|
|
61
|
+
if len(self.tensor.shape) == 2:
|
|
62
|
+
# If [seq_len, hidden_dim], add batch dimension
|
|
63
|
+
tensor = self.tensor.unsqueeze(0)
|
|
64
|
+
else:
|
|
65
|
+
tensor = self.tensor
|
|
66
|
+
|
|
67
|
+
# Apply aggregation strategy
|
|
68
|
+
if self.aggregation_strategy == ActivationAggregationStrategy.MEAN_POOLING:
|
|
69
|
+
# Average over sequence length dimension
|
|
70
|
+
features = tensor.mean(dim=1).squeeze(0)
|
|
71
|
+
elif self.aggregation_strategy == ActivationAggregationStrategy.LAST_TOKEN:
|
|
72
|
+
# Take last token
|
|
73
|
+
features = tensor[:, -1, :].squeeze(0)
|
|
74
|
+
elif self.aggregation_strategy == ActivationAggregationStrategy.FIRST_TOKEN:
|
|
75
|
+
# Take first token
|
|
76
|
+
features = tensor[:, 0, :].squeeze(0)
|
|
77
|
+
elif self.aggregation_strategy == ActivationAggregationStrategy.MAX_POOLING:
|
|
78
|
+
# Max over sequence length dimension
|
|
79
|
+
features = tensor.max(dim=1)[0].squeeze(0)
|
|
80
|
+
else:
|
|
81
|
+
# Default to mean pooling
|
|
82
|
+
features = tensor.mean(dim=1).squeeze(0)
|
|
83
|
+
|
|
84
|
+
return features
|
|
85
|
+
|
|
86
|
+
def cpu(self):
|
|
87
|
+
"""Move tensor to CPU."""
|
|
88
|
+
if self.tensor is not None:
|
|
89
|
+
self.tensor = self.tensor.cpu()
|
|
90
|
+
return self
|
|
91
|
+
|
|
92
|
+
def detach(self):
|
|
93
|
+
"""Detach tensor from computation graph."""
|
|
94
|
+
if self.tensor is not None:
|
|
95
|
+
self.tensor = self.tensor.detach()
|
|
96
|
+
return self
|
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from typing import Sequence
|
|
3
|
+
from typing import Sequence, TYPE_CHECKING
|
|
4
4
|
import torch
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
8
8
|
from wisent.core.activations.core.atoms import LayerActivations, ActivationAggregationStrategy, LayerName, RawActivationMap
|
|
9
|
-
from wisent.core.
|
|
9
|
+
from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from wisent.core.models.wisent_model import WisentModel
|
|
13
|
+
|
|
10
14
|
__all__ = ["ActivationCollector"]
|
|
11
15
|
|
|
12
16
|
@dataclass(slots=True)
|
|
@@ -125,22 +129,23 @@ class ActivationCollector:
|
|
|
125
129
|
}
|
|
126
130
|
"""
|
|
127
131
|
|
|
128
|
-
model: WisentModel
|
|
132
|
+
model: "WisentModel"
|
|
129
133
|
store_device: str | torch.device = "cpu"
|
|
130
134
|
dtype: torch.dtype | None = None
|
|
131
135
|
|
|
132
136
|
def collect_for_pair(
|
|
133
137
|
self,
|
|
134
138
|
pair: ContrastivePair,
|
|
135
|
-
layers: Sequence[LayerName] | None = None,
|
|
139
|
+
layers: Sequence[LayerName] | None = None,
|
|
136
140
|
aggregation: ActivationAggregationStrategy = ActivationAggregationStrategy.CONTINUATION_TOKEN,
|
|
137
141
|
return_full_sequence: bool = False,
|
|
138
142
|
normalize_layers: bool = False,
|
|
143
|
+
prompt_strategy: PromptConstructionStrategy = PromptConstructionStrategy.CHAT_TEMPLATE,
|
|
139
144
|
) -> ContrastivePair:
|
|
140
145
|
pos = self._collect_for_texts(pair.prompt, _resp_text(pair.positive_response),
|
|
141
|
-
layers, aggregation, return_full_sequence, normalize_layers)
|
|
146
|
+
layers, aggregation, return_full_sequence, normalize_layers, prompt_strategy)
|
|
142
147
|
neg = self._collect_for_texts(pair.prompt, _resp_text(pair.negative_response),
|
|
143
|
-
layers, aggregation, return_full_sequence, normalize_layers)
|
|
148
|
+
layers, aggregation, return_full_sequence, normalize_layers, prompt_strategy)
|
|
144
149
|
return pair.with_activations(positive=pos, negative=neg)
|
|
145
150
|
|
|
146
151
|
def _collect_for_texts(
|
|
@@ -151,25 +156,16 @@ class ActivationCollector:
|
|
|
151
156
|
aggregation: ActivationAggregationStrategy,
|
|
152
157
|
return_full_sequence: bool,
|
|
153
158
|
normalize_layers: bool = False,
|
|
159
|
+
prompt_strategy: PromptConstructionStrategy = PromptConstructionStrategy.CHAT_TEMPLATE,
|
|
154
160
|
) -> LayerActivations:
|
|
155
|
-
|
|
161
|
+
|
|
156
162
|
self._ensure_eval_mode()
|
|
157
163
|
with torch.inference_mode():
|
|
158
164
|
tok = self.model.tokenizer # type: ignore[union-attr]
|
|
159
|
-
if not hasattr(tok, "apply_chat_template"):
|
|
160
|
-
raise RuntimeError("Tokenizer has no apply_chat_template; set it up or use a non-chat path.")
|
|
161
165
|
|
|
162
|
-
# 1) Build
|
|
163
|
-
prompt_text =
|
|
164
|
-
|
|
165
|
-
tokenize=False,
|
|
166
|
-
add_generation_prompt=True,
|
|
167
|
-
)
|
|
168
|
-
full_text = tok.apply_chat_template(
|
|
169
|
-
[{"role": "user", "content": prompt},
|
|
170
|
-
{"role": "assistant", "content": response}],
|
|
171
|
-
tokenize=False,
|
|
172
|
-
add_generation_prompt=False,
|
|
166
|
+
# 1) Build prompts based on strategy
|
|
167
|
+
prompt_text, full_text = self._build_prompts_for_strategy(
|
|
168
|
+
prompt, response, prompt_strategy, tok
|
|
173
169
|
)
|
|
174
170
|
|
|
175
171
|
# 2) Tokenize both with identical flags
|
|
@@ -217,6 +213,61 @@ class ActivationCollector:
|
|
|
217
213
|
activation_aggregation_strategy=None if return_full_sequence else aggregation,
|
|
218
214
|
)
|
|
219
215
|
|
|
216
|
+
def _build_prompts_for_strategy(
|
|
217
|
+
self,
|
|
218
|
+
prompt: str,
|
|
219
|
+
response: str,
|
|
220
|
+
strategy: PromptConstructionStrategy,
|
|
221
|
+
tokenizer
|
|
222
|
+
) -> tuple[str, str]:
|
|
223
|
+
"""
|
|
224
|
+
Build prompt_text and full_text based on the chosen prompt construction strategy.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
(prompt_text, full_text): Tuple of prompt-only text and prompt+response text
|
|
228
|
+
"""
|
|
229
|
+
if strategy == PromptConstructionStrategy.CHAT_TEMPLATE:
|
|
230
|
+
# Use model's built-in chat template
|
|
231
|
+
if not hasattr(tokenizer, "apply_chat_template"):
|
|
232
|
+
raise RuntimeError("Tokenizer has no apply_chat_template; set it up or use a different strategy.")
|
|
233
|
+
prompt_text = tokenizer.apply_chat_template(
|
|
234
|
+
[{"role": "user", "content": prompt}],
|
|
235
|
+
tokenize=False,
|
|
236
|
+
add_generation_prompt=True,
|
|
237
|
+
)
|
|
238
|
+
full_text = tokenizer.apply_chat_template(
|
|
239
|
+
[{"role": "user", "content": prompt},
|
|
240
|
+
{"role": "assistant", "content": response}],
|
|
241
|
+
tokenize=False,
|
|
242
|
+
add_generation_prompt=False,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
elif strategy == PromptConstructionStrategy.DIRECT_COMPLETION:
|
|
246
|
+
# Q → good_resp/bad_resp (direct answer)
|
|
247
|
+
prompt_text = prompt
|
|
248
|
+
full_text = f"{prompt} {response}"
|
|
249
|
+
|
|
250
|
+
elif strategy == PromptConstructionStrategy.INSTRUCTION_FOLLOWING:
|
|
251
|
+
# [INST] Q [/INST] → good_resp/bad_resp (instruction format)
|
|
252
|
+
prompt_text = f"[INST] {prompt} [/INST]"
|
|
253
|
+
full_text = f"[INST] {prompt} [/INST] {response}"
|
|
254
|
+
|
|
255
|
+
elif strategy == PromptConstructionStrategy.MULTIPLE_CHOICE:
|
|
256
|
+
# Which is better: Q A. bad B. good → "A"/"B" (choice format)
|
|
257
|
+
# For multiple choice, we expect response to be "A" or "B"
|
|
258
|
+
prompt_text = f"Which is better: {prompt} A. [bad response] B. [good response]\nAnswer:"
|
|
259
|
+
full_text = f"{prompt_text} {response}"
|
|
260
|
+
|
|
261
|
+
elif strategy == PromptConstructionStrategy.ROLE_PLAYING:
|
|
262
|
+
# Behave like person who would answer Q with good_resp → "I" (role assumption)
|
|
263
|
+
prompt_text = f"Behave like a person who would answer '{prompt}' with '{response}'. Say 'I' to confirm:"
|
|
264
|
+
full_text = f"{prompt_text} I"
|
|
265
|
+
|
|
266
|
+
else:
|
|
267
|
+
raise ValueError(f"Unknown prompt construction strategy: {strategy}")
|
|
268
|
+
|
|
269
|
+
return prompt_text, full_text
|
|
270
|
+
|
|
220
271
|
def _select_indices(self, layer_names: Sequence[str] | None, n_blocks: int) -> list[int]:
|
|
221
272
|
"""Map layer names '1'..'L' -> indices 0..L-1."""
|
|
222
273
|
if not layer_names:
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Prompt construction strategies for activation collection."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PromptConstructionStrategy(Enum):
|
|
7
|
+
"""
|
|
8
|
+
Strategies for constructing prompts from question-answer pairs.
|
|
9
|
+
|
|
10
|
+
These strategies determine how the prompt and response are formatted
|
|
11
|
+
before being passed to the model for activation extraction.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
MULTIPLE_CHOICE = "multiple_choice"
|
|
15
|
+
"""
|
|
16
|
+
Format: Which is better: Q A. bad B. good → "A"/"B" (choice format)
|
|
17
|
+
Example: "Which is better: What is 2+2? A. 5 B. 4"
|
|
18
|
+
Response: "A" or "B"
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
ROLE_PLAYING = "role_playing"
|
|
22
|
+
"""
|
|
23
|
+
Format: Behave like person who would answer Q with good_resp → "I" (role assumption)
|
|
24
|
+
Example: "Behave like a person who would answer 'What is 2+2?' with '4'"
|
|
25
|
+
Response: "I"
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
DIRECT_COMPLETION = "direct_completion"
|
|
29
|
+
"""
|
|
30
|
+
Format: Q → good_resp/bad_resp (direct answer)
|
|
31
|
+
Example: "What is 2+2?"
|
|
32
|
+
Response: "4" or "5"
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
INSTRUCTION_FOLLOWING = "instruction_following"
|
|
36
|
+
"""
|
|
37
|
+
Format: [INST] Q [/INST] → good_resp/bad_resp (instruction format)
|
|
38
|
+
Example: "[INST] What is 2+2? [/INST]"
|
|
39
|
+
Response: "4" or "5"
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
CHAT_TEMPLATE = "chat_template"
|
|
43
|
+
"""
|
|
44
|
+
Format: Uses the model's built-in chat template
|
|
45
|
+
Example: <|start_header_id|>user<|end_header_id|>What is 2+2?<|eot_id|>
|
|
46
|
+
Response: Model's chat-formatted response
|
|
47
|
+
"""
|
wisent/core/agent/__init__.py
CHANGED
|
@@ -1,18 +1 @@
|
|
|
1
|
-
|
|
2
|
-
Agent module for wisent-guard autonomous systems.
|
|
3
|
-
|
|
4
|
-
This module provides:
|
|
5
|
-
- ResponseDiagnostics: Response analysis and quality assessment
|
|
6
|
-
- ResponseSteering: Response improvement and steering
|
|
7
|
-
- Data classes for analysis and improvement results
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
from .diagnose import ResponseDiagnostics, AnalysisResult
|
|
11
|
-
from .steer import ResponseSteering, ImprovementResult
|
|
12
|
-
|
|
13
|
-
__all__ = [
|
|
14
|
-
'ResponseDiagnostics',
|
|
15
|
-
'AnalysisResult',
|
|
16
|
-
'ResponseSteering',
|
|
17
|
-
'ImprovementResult'
|
|
18
|
-
]
|
|
1
|
+
# Empty __init__.py to avoid cascading import errors with empty __init__ pattern
|
wisent/core/agent/budget.py
CHANGED
|
@@ -276,7 +276,7 @@ class BudgetManager:
|
|
|
276
276
|
return estimate_task_time("benchmark_eval", 100)
|
|
277
277
|
|
|
278
278
|
except Exception as e:
|
|
279
|
-
raise RuntimeError(f"Device benchmark estimate failed for task '{task_name}': {e}. Run device benchmark first with: python -m
|
|
279
|
+
raise RuntimeError(f"Device benchmark estimate failed for task '{task_name}': {e}. Run device benchmark first with: python -m wisent.core.agent.budget benchmark")
|
|
280
280
|
|
|
281
281
|
elif resource_type == ResourceType.MEMORY:
|
|
282
282
|
raise RuntimeError(f"Memory estimation not implemented for task '{task_name}'")
|
|
@@ -348,7 +348,7 @@ def calculate_max_tasks_for_time_budget(task_type: str = "benchmark_evaluation",
|
|
|
348
348
|
return max_tasks
|
|
349
349
|
|
|
350
350
|
except Exception as e:
|
|
351
|
-
raise RuntimeError(f"Budget calculation failed for task '{task_type}': {e}. Run device benchmark first with: python -m
|
|
351
|
+
raise RuntimeError(f"Budget calculation failed for task '{task_type}': {e}. Run device benchmark first with: python -m wisent.core.agent.budget benchmark")
|
|
352
352
|
|
|
353
353
|
|
|
354
354
|
def optimize_tasks_for_budget(task_candidates: List[str],
|
|
@@ -629,7 +629,7 @@ except Exception as e:
|
|
|
629
629
|
"""
|
|
630
630
|
benchmark = self.get_current_benchmark()
|
|
631
631
|
if not benchmark:
|
|
632
|
-
raise RuntimeError(f"No benchmark available for device. Run benchmark first with: python -m
|
|
632
|
+
raise RuntimeError(f"No benchmark available for device. Run benchmark first with: python -m wisent.core.agent.budget benchmark")
|
|
633
633
|
else:
|
|
634
634
|
# Use actual benchmark results
|
|
635
635
|
if task_type == "model_loading":
|
|
@@ -1,55 +1 @@
|
|
|
1
|
-
|
|
2
|
-
Diagnostic module for autonomous agent.
|
|
3
|
-
|
|
4
|
-
This module provides:
|
|
5
|
-
- Classifier selection and auto-discovery
|
|
6
|
-
- On-the-fly classifier creation
|
|
7
|
-
- Response analysis and quality assessment
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
# Response diagnostics
|
|
11
|
-
from .response_diagnostics import ResponseDiagnostics, AnalysisResult
|
|
12
|
-
|
|
13
|
-
# Classifier management
|
|
14
|
-
from .select_classifiers import ClassifierSelector, ClassifierInfo, SelectionCriteria, auto_select_classifiers_for_agent
|
|
15
|
-
from .create_classifier import ClassifierCreator, TrainingConfig, TrainingResult, create_classifier_on_demand
|
|
16
|
-
|
|
17
|
-
# New marketplace system
|
|
18
|
-
from .classifier_marketplace import (
|
|
19
|
-
ClassifierMarketplace,
|
|
20
|
-
ClassifierListing,
|
|
21
|
-
ClassifierCreationEstimate
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
# Agent decision system
|
|
25
|
-
from .agent_classifier_decision import (
|
|
26
|
-
AgentClassifierDecisionSystem,
|
|
27
|
-
TaskAnalysis,
|
|
28
|
-
ClassifierDecision
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
__all__ = [
|
|
32
|
-
# Response diagnostics
|
|
33
|
-
'ResponseDiagnostics',
|
|
34
|
-
'AnalysisResult',
|
|
35
|
-
|
|
36
|
-
# Legacy classifier management (for backward compatibility)
|
|
37
|
-
'ClassifierSelector',
|
|
38
|
-
'ClassifierInfo',
|
|
39
|
-
'SelectionCriteria',
|
|
40
|
-
'auto_select_classifiers_for_agent',
|
|
41
|
-
'ClassifierCreator',
|
|
42
|
-
'TrainingConfig',
|
|
43
|
-
'TrainingResult',
|
|
44
|
-
'create_classifier_on_demand',
|
|
45
|
-
|
|
46
|
-
# New marketplace system
|
|
47
|
-
'ClassifierMarketplace',
|
|
48
|
-
'ClassifierListing',
|
|
49
|
-
'ClassifierCreationEstimate',
|
|
50
|
-
|
|
51
|
-
# Agent decision system
|
|
52
|
-
'AgentClassifierDecisionSystem',
|
|
53
|
-
'TaskAnalysis',
|
|
54
|
-
'ClassifierDecision'
|
|
55
|
-
]
|
|
1
|
+
# Empty __init__.py to avoid cascading import errors with empty __init__ pattern
|
|
@@ -53,9 +53,9 @@ class ClassifierMarketplace:
|
|
|
53
53
|
self.search_paths = search_paths or [
|
|
54
54
|
"./models/",
|
|
55
55
|
"./classifiers/",
|
|
56
|
-
"./
|
|
57
|
-
"./
|
|
58
|
-
"./
|
|
56
|
+
"./wisent/models/",
|
|
57
|
+
"./wisent/classifiers/",
|
|
58
|
+
"./wisent/core/classifiers/"
|
|
59
59
|
]
|
|
60
60
|
self.available_classifiers: List[ClassifierListing] = []
|
|
61
61
|
self._training_time_cache = {}
|
|
@@ -75,8 +75,8 @@ class ClassifierMarketplace:
|
|
|
75
75
|
if not os.path.exists(search_path):
|
|
76
76
|
continue
|
|
77
77
|
|
|
78
|
-
# For
|
|
79
|
-
if "
|
|
78
|
+
# For wisent/core/classifiers, search recursively for the nested structure
|
|
79
|
+
if "wisent/core/classifiers" in search_path:
|
|
80
80
|
import glob
|
|
81
81
|
pattern = os.path.join(search_path, "**", "*.pkl")
|
|
82
82
|
classifier_files = glob.glob(pattern, recursive=True)
|
|
@@ -163,9 +163,9 @@ class ClassifierMarketplace:
|
|
|
163
163
|
"""Parse layer and issue type from filename."""
|
|
164
164
|
filename = os.path.basename(filepath).lower()
|
|
165
165
|
|
|
166
|
-
# Check if this is from
|
|
167
|
-
if "
|
|
168
|
-
# Extract from path structure:
|
|
166
|
+
# Check if this is from wisent/core/classifiers with nested structure
|
|
167
|
+
if "wisent/core/classifiers" in filepath:
|
|
168
|
+
# Extract from path structure: wisent/core/classifiers/{model}/{benchmark}/layer_{layer}.pkl
|
|
169
169
|
path_parts = filepath.split(os.sep)
|
|
170
170
|
|
|
171
171
|
# Find the benchmark name (second to last directory)
|
|
@@ -11,11 +11,11 @@ This module handles:
|
|
|
11
11
|
from dataclasses import dataclass
|
|
12
12
|
from typing import Any, Dict, List
|
|
13
13
|
|
|
14
|
-
from wisent.core.activations import ActivationAggregationStrategy
|
|
14
|
+
from wisent.core.activations.core.atoms import ActivationAggregationStrategy
|
|
15
|
+
from wisent.core.activations.activations import Activations
|
|
15
16
|
from wisent.core.classifier.classifier import Classifier
|
|
16
|
-
|
|
17
|
-
from
|
|
18
|
-
from ...model import Model
|
|
17
|
+
from wisent.core.layer import Layer
|
|
18
|
+
from wisent.core.model import Model
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
@dataclass
|
|
@@ -193,7 +193,7 @@ class SyntheticClassifierFactory:
|
|
|
193
193
|
logging.info("Starting classifier training...")
|
|
194
194
|
try:
|
|
195
195
|
# Convert activations to the format expected by train_on_activations method
|
|
196
|
-
from wisent.core.activations import Activations
|
|
196
|
+
from wisent.core.activations.activations import Activations
|
|
197
197
|
|
|
198
198
|
# Convert torch tensors to Activations objects if needed
|
|
199
199
|
harmful_activations = []
|
|
@@ -331,7 +331,7 @@ def handle_configurable_group_task(task_name: str):
|
|
|
331
331
|
# Look for existing YAML files in common directories
|
|
332
332
|
yaml_candidates = []
|
|
333
333
|
search_dirs = [
|
|
334
|
-
"
|
|
334
|
+
"wisent/parameters/tasks",
|
|
335
335
|
".",
|
|
336
336
|
"tasks",
|
|
337
337
|
"configs"
|
|
@@ -891,7 +891,7 @@ def save_custom_task_yaml(task_name: str, yaml_content: str) -> Optional[str]:
|
|
|
891
891
|
"""
|
|
892
892
|
try:
|
|
893
893
|
# Create the tasks directory if it doesn't exist
|
|
894
|
-
tasks_dir = os.path.join("
|
|
894
|
+
tasks_dir = os.path.join("wisent", "parameters", "tasks")
|
|
895
895
|
os.makedirs(tasks_dir, exist_ok=True)
|
|
896
896
|
|
|
897
897
|
# Save the YAML content to a file
|
|
@@ -993,7 +993,7 @@ def create_flan_held_in_files() -> Optional[str]:
|
|
|
993
993
|
"""
|
|
994
994
|
try:
|
|
995
995
|
# Create the tasks directory
|
|
996
|
-
tasks_dir = os.path.join("
|
|
996
|
+
tasks_dir = os.path.join("wisent", "parameters", "tasks")
|
|
997
997
|
os.makedirs(tasks_dir, exist_ok=True)
|
|
998
998
|
|
|
999
999
|
# Create the template file first
|
wisent/core/agent/diagnose.py
CHANGED
|
@@ -11,7 +11,8 @@ This module handles:
|
|
|
11
11
|
from dataclasses import dataclass
|
|
12
12
|
from typing import Any, Dict, List
|
|
13
13
|
|
|
14
|
-
from wisent.core.activations import ActivationAggregationStrategy
|
|
14
|
+
from wisent.core.activations.core.atoms import ActivationAggregationStrategy
|
|
15
|
+
from wisent.core.activations.activations import Activations
|
|
15
16
|
from wisent.core.classifier.classifier import Classifier
|
|
16
17
|
|
|
17
18
|
from ..layer import Layer
|
wisent/core/autonomous_agent.py
CHANGED
|
@@ -12,7 +12,8 @@ A model that can autonomously use wisent-guard capabilities on itself:
|
|
|
12
12
|
import asyncio
|
|
13
13
|
from typing import Any, Dict, List, Optional
|
|
14
14
|
|
|
15
|
-
from wisent.core.activations import ActivationAggregationStrategy
|
|
15
|
+
from wisent.core.activations.core.atoms import ActivationAggregationStrategy
|
|
16
|
+
from wisent.core.activations.activations import Activations
|
|
16
17
|
|
|
17
18
|
from .agent.diagnose import AgentClassifierDecisionSystem, AnalysisResult, ClassifierMarketplace, ResponseDiagnostics
|
|
18
19
|
from .agent.steer import ImprovementResult, ResponseSteering
|
|
@@ -768,9 +769,16 @@ class AutonomousAgent:
|
|
|
768
769
|
if not classifier_config:
|
|
769
770
|
return None
|
|
770
771
|
|
|
772
|
+
# Validate required classifier configuration
|
|
773
|
+
if "layer" not in classifier_config:
|
|
774
|
+
raise ValueError(
|
|
775
|
+
"Classifier configuration must include 'layer' parameter. "
|
|
776
|
+
"Please ensure your classifier configuration file specifies the optimal layer."
|
|
777
|
+
)
|
|
778
|
+
|
|
771
779
|
# Create ClassifierParams from stored data
|
|
772
780
|
params = ClassifierParams(
|
|
773
|
-
optimal_layer=classifier_config
|
|
781
|
+
optimal_layer=classifier_config["layer"],
|
|
774
782
|
classification_threshold=classifier_config.get("threshold", 0.5),
|
|
775
783
|
training_samples=classifier_config.get("samples", 25),
|
|
776
784
|
classifier_type=classifier_config.get("type", "logistic"),
|