wisent 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wisent might be problematic. Click here for more details.
- wisent/__init__.py +1 -1
- wisent/benchmarks/coding/metrics/evaluator.py +16 -16
- wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +2 -2
- wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +2 -2
- wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +2 -2
- wisent/benchmarks/coding/safe_docker/core/runtime.py +4 -4
- wisent/benchmarks/coding/safe_docker/entrypoint.py +1 -1
- wisent/benchmarks/coding/safe_docker/recipes.py +1 -1
- wisent/classifiers/models/logistic.py +1 -1
- wisent/classifiers/models/mlp.py +1 -1
- wisent/cli/classifiers/classifier_rotator.py +1 -1
- wisent/cli/data_loaders/data_loader_rotator.py +1 -1
- wisent/cli/evaluators/evaluator_rotator.py +1 -1
- wisent/cli/steering_methods/steering_rotator.py +3 -3
- wisent/cli/wisent_cli/commands/help_cmd.py +1 -1
- wisent/cli/wisent_cli/commands/listing.py +7 -7
- wisent/cli/wisent_cli/commands/train_cmd.py +15 -15
- wisent/cli/wisent_cli/main.py +7 -7
- wisent/cli/wisent_cli/shell.py +2 -2
- wisent/cli/wisent_cli/util/aggregations.py +1 -1
- wisent/core/__init__.py +0 -15
- wisent/core/activations/activations_collector.py +5 -5
- wisent/core/agent/device_benchmarks.py +9 -9
- wisent/core/agent/diagnose/classifier_marketplace.py +1 -1
- wisent/core/agent/diagnose/create_classifier.py +1 -1
- wisent/core/agent/diagnose/response_diagnostics.py +2 -2
- wisent/core/agent/diagnose/synthetic_classifier_option.py +6 -6
- wisent/core/agent/diagnose/test_synthetic_classifier.py +3 -3
- wisent/core/agent/diagnose.py +2 -2
- wisent/core/autonomous_agent.py +1 -1
- wisent/core/contrastive_pairs/core/atoms.py +1 -1
- wisent/core/contrastive_pairs/core/buliders.py +3 -3
- wisent/core/contrastive_pairs/core/pair.py +4 -4
- wisent/core/contrastive_pairs/core/response.py +2 -2
- wisent/core/contrastive_pairs/core/serialization.py +2 -2
- wisent/core/contrastive_pairs/core/set.py +3 -3
- wisent/core/contrastive_pairs/diagnostics/control_vectors.py +1 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +1 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +4 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +4 -4
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +3 -3
- wisent/core/data_loaders/core/atoms.py +1 -1
- wisent/core/data_loaders/loaders/custom.py +3 -3
- wisent/core/data_loaders/loaders/lm_loader.py +4 -4
- wisent/core/download_full_benchmarks.py +2 -2
- wisent/core/evaluators/oracles/interactive.py +1 -1
- wisent/core/evaluators/oracles/nlp_evaluator.py +1 -1
- wisent/core/evaluators/oracles/user_specified.py +1 -1
- wisent/core/lm_eval_harness_ground_truth.py +2 -2
- wisent/core/log_likelihoods_evaluator.py +4 -4
- wisent/core/models/core/atoms.py +1 -1
- wisent/core/models/wisent_model.py +5 -5
- wisent/core/optuna/__init__.py +3 -3
- wisent/core/optuna/classifier/activation_generator.py +2 -2
- wisent/core/optuna/classifier/classifier_cache.py +1 -1
- wisent/core/optuna/classifier/optuna_classifier_optimizer.py +2 -2
- wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +3 -3
- wisent/core/optuna/steering/data_utils.py +3 -3
- wisent/core/optuna/steering/metrics.py +5 -5
- wisent/core/optuna/steering/optuna_pipeline.py +8 -8
- wisent/core/optuna/steering/steering_optimization.py +9 -9
- wisent/core/prompts/core/prompt_formater.py +1 -1
- wisent/core/prompts/prompt_stratiegies/direct_completion.py +1 -1
- wisent/core/prompts/prompt_stratiegies/instruction_following.py +1 -1
- wisent/core/prompts/prompt_stratiegies/multiple_choice.py +1 -1
- wisent/core/prompts/prompt_stratiegies/role_playing.py +1 -1
- wisent/core/sample_size_optimizer.py +2 -2
- wisent/core/steering.py +2 -2
- wisent/core/steering_methods/core/atoms.py +2 -2
- wisent/core/steering_methods/methods/caa.py +1 -1
- wisent/core/steering_optimizer.py +2 -2
- wisent/core/tracking/memory.py +1 -1
- wisent/core/trainers/core/atoms.py +2 -2
- wisent/core/trainers/steering_trainer.py +7 -7
- wisent/opti/methods/opti_classificator.py +5 -5
- wisent/opti/methods/opti_steering.py +5 -5
- wisent/synthetic/cleaners/core/atoms.py +1 -1
- wisent/synthetic/cleaners/deduper_cleaner.py +4 -4
- wisent/synthetic/cleaners/methods/base_dedupers.py +2 -2
- wisent/synthetic/cleaners/methods/base_refusalers.py +2 -2
- wisent/synthetic/cleaners/methods/core/atoms.py +2 -2
- wisent/synthetic/cleaners/pairs_cleaner.py +10 -10
- wisent/synthetic/cleaners/refusaler_cleaner.py +12 -12
- wisent/synthetic/db_instructions/mini_dp.py +1 -1
- wisent/synthetic/generators/diversities/methods/fast_diversity.py +1 -1
- wisent/synthetic/generators/pairs_generator.py +8 -8
- {wisent-0.5.1.dist-info → wisent-0.5.3.dist-info}/METADATA +1 -1
- {wisent-0.5.1.dist-info → wisent-0.5.3.dist-info}/RECORD +91 -91
- {wisent-0.5.1.dist-info → wisent-0.5.3.dist-info}/WHEEL +0 -0
- {wisent-0.5.1.dist-info → wisent-0.5.3.dist-info}/licenses/LICENSE +0 -0
- {wisent-0.5.1.dist-info → wisent-0.5.3.dist-info}/top_level.txt +0 -0
wisent/core/autonomous_agent.py
CHANGED
|
@@ -12,7 +12,7 @@ A model that can autonomously use wisent-guard capabilities on itself:
|
|
|
12
12
|
import asyncio
|
|
13
13
|
from typing import Any, Dict, List, Optional
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from wisent.core.activations import ActivationAggregationStrategy, Activations
|
|
16
16
|
|
|
17
17
|
from .agent.diagnose import AgentClassifierDecisionSystem, AnalysisResult, ClassifierMarketplace, ResponseDiagnostics
|
|
18
18
|
from .agent.steer import ImprovementResult, ResponseSteering
|
|
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
|
|
|
5
5
|
from typing import Iterable, TYPE_CHECKING
|
|
6
6
|
|
|
7
7
|
if TYPE_CHECKING:
|
|
8
|
-
from
|
|
8
|
+
from wisent.core.activations.core.atoms import LayerActivations
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
11
|
"AtomResponse",
|
|
@@ -4,9 +4,9 @@ import logging
|
|
|
4
4
|
from typing import Iterable
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
7
|
+
from wisent.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
|
|
8
|
+
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
9
|
+
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
12
12
|
"from_phrase_pairs",
|
|
@@ -2,13 +2,13 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass, replace
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
5
|
+
from wisent.core.contrastive_pairs.core.atoms import AtomContrastivePair
|
|
6
|
+
from wisent.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
|
|
7
7
|
|
|
8
8
|
from typing import TYPE_CHECKING
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
|
-
from
|
|
11
|
+
from wisent.core.activations.core.atoms import LayerActivations, RawActivationMap
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
14
|
"ContrastivePair",
|
|
@@ -167,7 +167,7 @@ class ContrastivePair(AtomContrastivePair):
|
|
|
167
167
|
)
|
|
168
168
|
'''
|
|
169
169
|
|
|
170
|
-
from
|
|
170
|
+
from wisent.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
|
|
171
171
|
|
|
172
172
|
return cls(
|
|
173
173
|
prompt=str(data["prompt"]),
|
|
@@ -2,8 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass, replace
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
5
|
+
from wisent.core.contrastive_pairs.core.atoms import AtomResponse
|
|
6
|
+
from wisent.core.activations.core.atoms import LayerActivations, RawActivationMap
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"Response",
|
|
@@ -9,8 +9,8 @@ from pathlib import Path
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import torch
|
|
11
11
|
|
|
12
|
-
from
|
|
13
|
-
from
|
|
12
|
+
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
13
|
+
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
16
|
"save_contrastive_pair_set",
|
|
@@ -7,10 +7,10 @@ from dataclasses import dataclass, field
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import TYPE_CHECKING
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
10
|
+
from wisent.core.contrastive_pairs.core.atoms import AtomContrastivePairSet
|
|
11
|
+
from wisent.core.contrastive_pairs.diagnostics import DiagnosticsConfig, DiagnosticsReport, run_all_diagnostics
|
|
12
12
|
|
|
13
|
-
from
|
|
13
|
+
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
16
|
"ContrastivePairSet",
|
|
@@ -8,7 +8,7 @@ from typing import Mapping
|
|
|
8
8
|
|
|
9
9
|
import torch
|
|
10
10
|
|
|
11
|
-
from
|
|
11
|
+
from wisent.core.activations.core.atoms import LayerActivations, RawActivationMap
|
|
12
12
|
|
|
13
13
|
from .base import DiagnosticsIssue, DiagnosticsReport, MetricReport
|
|
14
14
|
|
|
@@ -4,12 +4,12 @@ from typing import Type, Union
|
|
|
4
4
|
import importlib
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from wisent.core.contrastive_pairs.lm_eval_pairs.atoms import (
|
|
8
8
|
LMEvalBenchmarkExtractor,
|
|
9
9
|
UnsupportedLMEvalBenchmarkError,
|
|
10
10
|
)
|
|
11
11
|
|
|
12
|
-
from
|
|
12
|
+
from wisent.core.contrastive_pairs.lm_eval_pairs.lm_extractor_manifest import EXTRACTORS as _MANIFEST
|
|
13
13
|
|
|
14
14
|
__all__ = [
|
|
15
15
|
"register_extractor",
|
|
@@ -37,8 +37,8 @@ def register_extractor(name: str, ref: Union[str, Type[LMEvalBenchmarkExtractor]
|
|
|
37
37
|
If the ref class does not subclass LMEvalBenchmarkExtractor.
|
|
38
38
|
|
|
39
39
|
example:
|
|
40
|
-
>>> from
|
|
41
|
-
>>> from
|
|
40
|
+
>>> from wisent.core.contrastive_pairs.lm_eval_pairs.lm_extractor_registry import register_extractor
|
|
41
|
+
>>> from wisent.core.contrastive_pairs.lm_eval_pairs.atoms import LMEvalBenchmarkExtractor
|
|
42
42
|
>>> class MyExtractor(LMEvalBenchmarkExtractor): ...
|
|
43
43
|
>>> register_extractor("mytask", MyExtractor)
|
|
44
44
|
>>> register_extractor("mytask2", "my_module:MyExtractor")
|
|
@@ -2,10 +2,10 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import Any, TYPE_CHECKING
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
5
|
+
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
6
|
+
from wisent.core.contrastive_pairs.core.response import NegativeResponse, PositiveResponse
|
|
7
|
+
from wisent.core.contrastive_pairs.lm_eval_pairs.atoms import LMEvalBenchmarkExtractor
|
|
8
|
+
from wisent.cli.cli_logger import setup_logger, bind
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from lm_eval.api.task import ConfigurableTask
|
|
@@ -2,12 +2,12 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
5
|
+
from wisent.core.contrastive_pairs.lm_eval_pairs.lm_extractor_registry import get_extractor
|
|
6
|
+
from wisent.cli.cli_logger import setup_logger, bind
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from lm_eval.api.task import ConfigurableTask
|
|
10
|
-
from
|
|
10
|
+
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
11
11
|
|
|
12
12
|
__all__ = ["build_contrastive_pairs"]
|
|
13
13
|
_LOG = setup_logger(__name__)
|
|
@@ -6,7 +6,7 @@ from typing import Any, Dict, Type
|
|
|
6
6
|
|
|
7
7
|
from typing import TypedDict, Mapping
|
|
8
8
|
from lm_eval.api.task import ConfigurableTask
|
|
9
|
-
from
|
|
9
|
+
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
10
10
|
|
|
11
11
|
__all__ = ["DataLoaderError", "BaseDataLoader"]
|
|
12
12
|
|
|
@@ -2,9 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
from typing import Any, Iterable
|
|
3
3
|
import logging
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
5
|
+
from wisent.core.data_loaders.core.atoms import BaseDataLoader, DataLoaderError, LoadDataResult
|
|
6
|
+
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
7
|
+
from wisent.core.contrastive_pairs.core.serialization import load_contrastive_pair_set
|
|
8
8
|
|
|
9
9
|
__all__ = [
|
|
10
10
|
"CustomUserDataLoader",
|
|
@@ -2,12 +2,12 @@ from __future__ import annotations
|
|
|
2
2
|
from typing import Any, TYPE_CHECKING
|
|
3
3
|
import logging
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
5
|
+
from wisent.core.data_loaders.core.atoms import BaseDataLoader, DataLoaderError, LoadDataResult
|
|
6
|
+
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
7
|
+
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
8
8
|
from lm_eval.tasks import get_task_dict
|
|
9
9
|
from lm_eval.tasks import TaskManager as LMTaskManager
|
|
10
|
-
from
|
|
10
|
+
from wisent.core.contrastive_pairs.lm_eval_pairs.lm_task_pairs_generation import (
|
|
11
11
|
lm_build_contrastive_pairs,
|
|
12
12
|
)
|
|
13
13
|
|
|
@@ -911,7 +911,7 @@ class FullBenchmarkDownloader:
|
|
|
911
911
|
def _convert_mbpp_format(self, sample: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
912
912
|
"""Convert MBPP format (programming problems with code)."""
|
|
913
913
|
# Use the benchmark extractor to get contrastive pairs
|
|
914
|
-
from
|
|
914
|
+
from wisent.core.benchmark_extractors import extract_contrastive_pair
|
|
915
915
|
|
|
916
916
|
try:
|
|
917
917
|
contrastive_data = extract_contrastive_pair("mbpp", sample, None)
|
|
@@ -992,7 +992,7 @@ class FullBenchmarkDownloader:
|
|
|
992
992
|
return []
|
|
993
993
|
|
|
994
994
|
# Use the HLE extractor to get contrastive pairs
|
|
995
|
-
from
|
|
995
|
+
from wisent.core.benchmark_extractors import HLEExtractor
|
|
996
996
|
|
|
997
997
|
try:
|
|
998
998
|
extractor = HLEExtractor()
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import sys
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from
|
|
6
|
+
from wisent.core.evaluators.core.atoms import BaseEvaluator, EvalResult, EvaluatorError
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"InteractiveEvaluator",
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import Any, Optional
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from wisent.core.evaluators.core.atoms import BaseEvaluator, EvalResult
|
|
6
6
|
|
|
7
7
|
__all__ = [
|
|
8
8
|
"UserSpecifiedEvaluator",
|
|
@@ -7,8 +7,8 @@ This module provides ground truth evaluation using the lm-eval-harness framework
|
|
|
7
7
|
import logging
|
|
8
8
|
from typing import Any, Dict
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
10
|
+
from wisent.core.activations import ActivationAggregationStrategy, Activations
|
|
11
|
+
from wisent.core.layer import Layer
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -10,8 +10,8 @@ directly on each choice to evaluate performance against known ground truth.
|
|
|
10
10
|
import logging
|
|
11
11
|
from typing import Any, Dict, Optional
|
|
12
12
|
|
|
13
|
-
from
|
|
14
|
-
from
|
|
13
|
+
from wisent.core.activations import ActivationAggregationStrategy, Activations
|
|
14
|
+
from wisent.core.layer import Layer
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
@@ -87,10 +87,10 @@ class LogLikelihoodsEvaluator:
|
|
|
87
87
|
logger.info(f"Extracted {len(qa_pairs)} QA pairs from {task_name}")
|
|
88
88
|
|
|
89
89
|
# Use existing contrastive pair creation infrastructure
|
|
90
|
-
from
|
|
90
|
+
from wisent.core.activations.activation_collection_method import (
|
|
91
91
|
ActivationCollectionLogic,
|
|
92
92
|
)
|
|
93
|
-
from
|
|
93
|
+
from wisent.core.activations.prompts import PromptConstructionStrategy
|
|
94
94
|
|
|
95
95
|
collector = ActivationCollectionLogic(model=evaluation_model)
|
|
96
96
|
|
wisent/core/models/core/atoms.py
CHANGED
|
@@ -16,12 +16,12 @@ from transformers import (
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
from
|
|
20
|
-
from
|
|
19
|
+
from wisent.core.models.core.atoms import SteeringPlan, SteeringVector, HookHandleGroup, GenerationStats, TopLogits
|
|
20
|
+
from wisent.core.activations.core.atoms import RawActivationMap
|
|
21
21
|
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
22
|
+
from wisent.core.prompts.core.atom import ChatMessage
|
|
23
|
+
from wisent.core.utils.device import resolve_default_device, resolve_torch_device
|
|
24
|
+
from wisent.core.contrastive_pairs.diagnostics import run_control_steering_diagnostics
|
|
25
25
|
|
|
26
26
|
import threading
|
|
27
27
|
|
wisent/core/optuna/__init__.py
CHANGED
|
@@ -21,7 +21,7 @@ Key components:
|
|
|
21
21
|
|
|
22
22
|
# Steering optimization components
|
|
23
23
|
# Classifier optimization components
|
|
24
|
-
from
|
|
24
|
+
from wisent.core.optuna.classifier import (
|
|
25
25
|
ActivationGenerator,
|
|
26
26
|
CacheConfig,
|
|
27
27
|
ClassifierCache,
|
|
@@ -30,13 +30,13 @@ from wisent_guard.core.optuna.classifier import (
|
|
|
30
30
|
OptimizationResult,
|
|
31
31
|
OptunaClassifierOptimizer,
|
|
32
32
|
)
|
|
33
|
-
from
|
|
33
|
+
from wisent.core.optuna.steering.metrics import (
|
|
34
34
|
calculate_comprehensive_metrics,
|
|
35
35
|
evaluate_benchmark_performance,
|
|
36
36
|
evaluate_probe_performance,
|
|
37
37
|
generate_performance_summary,
|
|
38
38
|
)
|
|
39
|
-
from
|
|
39
|
+
from wisent.core.optuna.steering.optuna_pipeline import OptimizationConfig, OptimizationPipeline
|
|
40
40
|
|
|
41
41
|
__all__ = [
|
|
42
42
|
# Steering optimization
|
|
@@ -15,8 +15,8 @@ from typing import Any, Optional
|
|
|
15
15
|
import numpy as np
|
|
16
16
|
import torch
|
|
17
17
|
|
|
18
|
-
from
|
|
19
|
-
from
|
|
18
|
+
from wisent.core.activations.activation_collection_method import ActivationCollectionLogic
|
|
19
|
+
from wisent.core.activations.core import ActivationAggregationStrategy, Activations
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
@@ -16,8 +16,8 @@ import torch
|
|
|
16
16
|
from optuna.pruners import MedianPruner
|
|
17
17
|
from optuna.samplers import TPESampler
|
|
18
18
|
|
|
19
|
-
from
|
|
20
|
-
from
|
|
19
|
+
from wisent.core.classifier.classifier import Classifier
|
|
20
|
+
from wisent.core.utils.device import resolve_default_device
|
|
21
21
|
|
|
22
22
|
from .activation_generator import ActivationData, ActivationGenerator, GenerationConfig
|
|
23
23
|
from .classifier_cache import CacheConfig, ClassifierCache
|
|
@@ -8,9 +8,9 @@ evaluation with the optuna optimization pipeline.
|
|
|
8
8
|
import logging
|
|
9
9
|
from typing import Any, Dict, List, Tuple
|
|
10
10
|
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
11
|
+
from wisent.core.bigcode_extractors import get_bigcode_extractor
|
|
12
|
+
from wisent.core.bigcode_integration import BigCodeEvaluator, is_bigcode_task
|
|
13
|
+
from wisent.parameters.task_config import CODING_TASKS
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger(__name__)
|
|
16
16
|
|
|
@@ -11,11 +11,11 @@ from tqdm import tqdm
|
|
|
11
11
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
12
12
|
|
|
13
13
|
# Import LMEvalHarnessGroundTruth for intelligent evaluation (same approach as CLI)
|
|
14
|
-
from
|
|
14
|
+
from wisent.core.lm_eval_harness_ground_truth import LMEvalHarnessGroundTruth
|
|
15
15
|
|
|
16
16
|
# Import task interface for dynamic task loading
|
|
17
|
-
from
|
|
18
|
-
from
|
|
17
|
+
from wisent.core.task_interface import get_task
|
|
18
|
+
from wisent.core.utils.device import empty_device_cache, preferred_dtype, resolve_default_device
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
@@ -8,13 +8,13 @@ from typing import Any, Callable, Dict, List, Optional
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
|
|
10
10
|
|
|
11
|
-
from
|
|
11
|
+
from wisent.core.bigcode_extractors import MBPPExtractor
|
|
12
12
|
|
|
13
13
|
# Import LMEvalHarnessGroundTruth for intelligent evaluation (newer approach used by CLI)
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
14
|
+
from wisent.core.lm_eval_harness_ground_truth import LMEvalHarnessGroundTruth
|
|
15
|
+
from wisent.core.task_interface import get_task
|
|
16
|
+
from wisent.core.tasks.file_task import FileTask
|
|
17
|
+
from wisent.parameters.task_config import CODING_TASKS
|
|
18
18
|
|
|
19
19
|
from .bigcode_evaluator_wrapper import OptunaBigCodeEvaluator
|
|
20
20
|
|
|
@@ -39,13 +39,13 @@ try:
|
|
|
39
39
|
WANDB_AVAILABLE = True
|
|
40
40
|
except ImportError:
|
|
41
41
|
WANDB_AVAILABLE = False
|
|
42
|
-
from
|
|
43
|
-
from
|
|
44
|
-
from
|
|
45
|
-
from
|
|
46
|
-
from
|
|
47
|
-
from
|
|
48
|
-
from
|
|
42
|
+
from wisent.core.contrastive_pairs.contrastive_pair import ContrastivePair
|
|
43
|
+
from wisent.core.contrastive_pairs.contrastive_pair_set import ContrastivePairSet
|
|
44
|
+
from wisent.core.optuna.steering import data_utils, metrics
|
|
45
|
+
from wisent.core.response import Response
|
|
46
|
+
from wisent.core.steering_methods.dac import DAC
|
|
47
|
+
from wisent.core.task_interface import get_task
|
|
48
|
+
from wisent.core.utils.device import empty_device_cache, preferred_dtype, resolve_default_device, resolve_device
|
|
49
49
|
|
|
50
50
|
logger = logging.getLogger(__name__)
|
|
51
51
|
|
|
@@ -559,7 +559,7 @@ class OptimizationPipeline:
|
|
|
559
559
|
|
|
560
560
|
if method_name == "caa":
|
|
561
561
|
# Create CAA instance
|
|
562
|
-
from
|
|
562
|
+
from wisent.core.steering_methods.caa import CAA
|
|
563
563
|
|
|
564
564
|
caa = CAA(device=self.device)
|
|
565
565
|
|
|
@@ -14,21 +14,21 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
14
14
|
import torch
|
|
15
15
|
from tqdm import tqdm
|
|
16
16
|
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
17
|
+
from wisent.core.activations.core import ActivationAggregationStrategy
|
|
18
|
+
from wisent.core.classifier.classifier import Classifier
|
|
19
|
+
from wisent.core.contrastive_pairs.contrastive_pair import ContrastivePair
|
|
20
|
+
from wisent.core.contrastive_pairs.contrastive_pair_set import ContrastivePairSet
|
|
21
|
+
from wisent.core.optuna.classifier import (
|
|
22
22
|
CacheConfig,
|
|
23
23
|
ClassifierCache,
|
|
24
24
|
ClassifierOptimizationConfig,
|
|
25
25
|
GenerationConfig,
|
|
26
26
|
OptunaClassifierOptimizer,
|
|
27
27
|
)
|
|
28
|
-
from
|
|
29
|
-
from
|
|
30
|
-
from
|
|
31
|
-
from
|
|
28
|
+
from wisent.core.optuna.steering import data_utils, metrics
|
|
29
|
+
from wisent.core.response import Response
|
|
30
|
+
from wisent.core.steering_methods.dac import DAC
|
|
31
|
+
from wisent.core.task_interface import get_task
|
|
32
32
|
|
|
33
33
|
logger = logging.getLogger(__name__)
|
|
34
34
|
|
|
@@ -5,7 +5,7 @@ import inspect
|
|
|
5
5
|
import pkgutil
|
|
6
6
|
from typing import Iterable, Type
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from wisent.core.prompts.core.atom import PromptPair, PromptStrategy, UnknownStrategyError
|
|
9
9
|
|
|
10
10
|
__all__ = ["PromptFormatter", "StrategyKey", "UnknownStrategyError"]
|
|
11
11
|
|
|
@@ -13,7 +13,7 @@ import matplotlib.pyplot as plt
|
|
|
13
13
|
import numpy as np
|
|
14
14
|
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
|
|
15
15
|
|
|
16
|
-
from
|
|
16
|
+
from wisent.core.classifier.classifier import Classifier
|
|
17
17
|
|
|
18
18
|
from .activations import ActivationAggregationStrategy
|
|
19
19
|
from .contrastive_pairs import ContrastivePairSet
|
|
@@ -171,7 +171,7 @@ class SampleSizeOptimizer:
|
|
|
171
171
|
logger.info(f"Extracted {len(qa_pairs)} QA pairs")
|
|
172
172
|
|
|
173
173
|
# Create contrastive pairs from QA pairs
|
|
174
|
-
from
|
|
174
|
+
from wisent.core.activations.activation_collection_method import ActivationCollectionLogic
|
|
175
175
|
|
|
176
176
|
collector = ActivationCollectionLogic(model=self.model)
|
|
177
177
|
|
wisent/core/steering.py
CHANGED
|
@@ -7,8 +7,8 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
7
7
|
import torch
|
|
8
8
|
import torch.nn.functional as F
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
10
|
+
from wisent.core.activations import Activations
|
|
11
|
+
from wisent.core.classifier.classifier import Classifier
|
|
12
12
|
|
|
13
13
|
from .contrastive_pairs import ContrastivePairSet
|
|
14
14
|
from .steering_method import CAA
|
|
@@ -7,8 +7,8 @@ import inspect
|
|
|
7
7
|
|
|
8
8
|
import torch
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
10
|
+
from wisent.core.activations.core.atoms import LayerActivations, RawActivationMap, LayerName
|
|
11
|
+
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
14
|
"SteeringError",
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from typing import List
|
|
4
4
|
import torch
|
|
5
5
|
|
|
6
|
-
from
|
|
6
|
+
from wisent.core.steering_methods.core.atoms import PerLayerBaseSteeringMethod
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"CAAMethod",
|
|
@@ -806,7 +806,7 @@ class SteeringOptimizer:
|
|
|
806
806
|
"""
|
|
807
807
|
try:
|
|
808
808
|
# Import CLI runner to test configuration
|
|
809
|
-
from
|
|
809
|
+
from wisent.cli import run_task_pipeline
|
|
810
810
|
|
|
811
811
|
# Prepare kwargs with method-specific parameters
|
|
812
812
|
kwargs = {
|
|
@@ -1270,7 +1270,7 @@ def get_optimal_steering_params(
|
|
|
1270
1270
|
# The following integration points need to be implemented:
|
|
1271
1271
|
#
|
|
1272
1272
|
# 1. CAA Integration:
|
|
1273
|
-
# - Load existing CAA implementation from
|
|
1273
|
+
# - Load existing CAA implementation from wisent.core.steering_methods.caa
|
|
1274
1274
|
# - Implement parameter optimization for CAA vectors
|
|
1275
1275
|
# - Measure CAA steering effectiveness
|
|
1276
1276
|
#
|