wisent 0.5.12__py3-none-any.whl → 0.5.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (227) hide show
  1. wisent/__init__.py +1 -1
  2. wisent/core/activations/__init__.py +26 -0
  3. wisent/core/activations/activations.py +96 -0
  4. wisent/core/activations/activations_collector.py +71 -20
  5. wisent/core/activations/prompt_construction_strategy.py +47 -0
  6. wisent/core/agent/__init__.py +1 -18
  7. wisent/core/agent/budget.py +2 -2
  8. wisent/core/agent/device_benchmarks.py +1 -1
  9. wisent/core/agent/diagnose/__init__.py +1 -55
  10. wisent/core/agent/diagnose/classifier_marketplace.py +8 -8
  11. wisent/core/agent/diagnose/response_diagnostics.py +4 -4
  12. wisent/core/agent/diagnose/synthetic_classifier_option.py +1 -1
  13. wisent/core/agent/diagnose/tasks/task_manager.py +3 -3
  14. wisent/core/agent/diagnose.py +2 -1
  15. wisent/core/autonomous_agent.py +10 -2
  16. wisent/core/benchmark_extractors.py +293 -0
  17. wisent/core/bigcode_integration.py +20 -7
  18. wisent/core/branding.py +108 -0
  19. wisent/core/cli/__init__.py +15 -0
  20. wisent/core/cli/create_steering_vector.py +138 -0
  21. wisent/core/cli/evaluate_responses.py +715 -0
  22. wisent/core/cli/generate_pairs.py +128 -0
  23. wisent/core/cli/generate_pairs_from_task.py +119 -0
  24. wisent/core/cli/generate_responses.py +129 -0
  25. wisent/core/cli/generate_vector_from_synthetic.py +149 -0
  26. wisent/core/cli/generate_vector_from_task.py +147 -0
  27. wisent/core/cli/get_activations.py +191 -0
  28. wisent/core/cli/optimize_classification.py +339 -0
  29. wisent/core/cli/optimize_steering.py +364 -0
  30. wisent/core/cli/tasks.py +182 -0
  31. wisent/core/cli_logger.py +22 -0
  32. wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +27 -1
  33. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +49 -1
  34. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +115 -0
  35. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +115 -0
  36. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +111 -0
  37. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +119 -0
  38. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +115 -0
  39. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +114 -0
  40. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +118 -0
  41. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +146 -0
  42. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +129 -0
  43. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +119 -0
  44. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +112 -0
  45. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +113 -0
  46. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livecodebench.py +367 -0
  47. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +115 -0
  48. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +114 -0
  49. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +113 -0
  50. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +112 -0
  51. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +111 -0
  52. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +114 -0
  53. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +113 -0
  54. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +115 -0
  55. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +111 -0
  56. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +113 -0
  57. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +113 -0
  58. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +112 -0
  59. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +116 -0
  60. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +115 -0
  61. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +111 -0
  62. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +111 -0
  63. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +121 -0
  64. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +121 -0
  65. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +111 -0
  66. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +110 -0
  67. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +114 -0
  68. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad2.py +124 -0
  69. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sst2.py +111 -0
  70. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +112 -0
  71. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +127 -0
  72. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_gen.py +112 -0
  73. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +117 -0
  74. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +117 -0
  75. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +127 -0
  76. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +119 -0
  77. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +1 -1
  78. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +111 -0
  79. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +114 -0
  80. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +112 -0
  81. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +114 -0
  82. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +114 -0
  83. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +1 -1
  84. wisent/core/data_loaders/__init__.py +235 -0
  85. wisent/core/data_loaders/loaders/lm_loader.py +2 -2
  86. wisent/core/data_loaders/loaders/task_interface_loader.py +300 -0
  87. wisent/{cli/data_loaders/data_loader_rotator.py → core/data_loaders/rotator.py} +1 -1
  88. wisent/core/download_full_benchmarks.py +79 -2
  89. wisent/core/evaluators/benchmark_specific/__init__.py +26 -0
  90. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/evaluator.py +17 -17
  91. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/cpp_sanitizer.py +2 -2
  92. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/java_sanitizer.py +2 -2
  93. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/python_sanitizer.py +2 -2
  94. wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/__init__.py +3 -0
  95. wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/provider.py +305 -0
  96. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/runtime.py +36 -4
  97. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/entrypoint.py +2 -4
  98. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/recipes.py +1 -1
  99. wisent/core/evaluators/benchmark_specific/coding/solution_generator.py +258 -0
  100. wisent/core/evaluators/benchmark_specific/exact_match_evaluator.py +79 -0
  101. wisent/core/evaluators/benchmark_specific/f1_evaluator.py +101 -0
  102. wisent/core/evaluators/benchmark_specific/generation_evaluator.py +197 -0
  103. wisent/core/{log_likelihoods_evaluator.py → evaluators/benchmark_specific/log_likelihoods_evaluator.py} +10 -2
  104. wisent/core/evaluators/benchmark_specific/perplexity_evaluator.py +140 -0
  105. wisent/core/evaluators/benchmark_specific/personalization_evaluator.py +250 -0
  106. wisent/{cli/evaluators/evaluator_rotator.py → core/evaluators/rotator.py} +4 -4
  107. wisent/core/lm_eval_harness_ground_truth.py +3 -2
  108. wisent/core/main.py +57 -0
  109. wisent/core/model_persistence.py +2 -2
  110. wisent/core/models/wisent_model.py +6 -6
  111. wisent/core/optuna/classifier/optuna_classifier_optimizer.py +2 -2
  112. wisent/core/optuna/steering/steering_optimization.py +1 -1
  113. wisent/core/parser_arguments/__init__.py +10 -0
  114. wisent/core/parser_arguments/agent_parser.py +110 -0
  115. wisent/core/parser_arguments/configure_model_parser.py +7 -0
  116. wisent/core/parser_arguments/create_steering_vector_parser.py +59 -0
  117. wisent/core/parser_arguments/evaluate_parser.py +40 -0
  118. wisent/core/parser_arguments/evaluate_responses_parser.py +10 -0
  119. wisent/core/parser_arguments/full_optimize_parser.py +115 -0
  120. wisent/core/parser_arguments/generate_pairs_from_task_parser.py +33 -0
  121. wisent/core/parser_arguments/generate_pairs_parser.py +29 -0
  122. wisent/core/parser_arguments/generate_responses_parser.py +15 -0
  123. wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +127 -0
  124. wisent/core/parser_arguments/generate_vector_from_task_parser.py +127 -0
  125. wisent/core/parser_arguments/generate_vector_parser.py +90 -0
  126. wisent/core/parser_arguments/get_activations_parser.py +90 -0
  127. wisent/core/parser_arguments/main_parser.py +152 -0
  128. wisent/core/parser_arguments/model_config_parser.py +59 -0
  129. wisent/core/parser_arguments/monitor_parser.py +17 -0
  130. wisent/core/parser_arguments/multi_steer_parser.py +47 -0
  131. wisent/core/parser_arguments/optimize_classification_parser.py +67 -0
  132. wisent/core/parser_arguments/optimize_sample_size_parser.py +58 -0
  133. wisent/core/parser_arguments/optimize_steering_parser.py +147 -0
  134. wisent/core/parser_arguments/synthetic_parser.py +93 -0
  135. wisent/core/parser_arguments/tasks_parser.py +584 -0
  136. wisent/core/parser_arguments/test_nonsense_parser.py +26 -0
  137. wisent/core/parser_arguments/utils.py +111 -0
  138. wisent/core/prompts/core/prompt_formater.py +3 -3
  139. wisent/core/prompts/prompt_stratiegies/direct_completion.py +2 -0
  140. wisent/core/prompts/prompt_stratiegies/instruction_following.py +2 -0
  141. wisent/core/prompts/prompt_stratiegies/multiple_choice.py +2 -0
  142. wisent/core/prompts/prompt_stratiegies/role_playing.py +2 -0
  143. wisent/{cli/steering_methods/steering_rotator.py → core/steering_methods/rotator.py} +4 -4
  144. wisent/core/steering_optimizer.py +45 -21
  145. wisent/{synthetic → core/synthetic}/cleaners/deduper_cleaner.py +3 -3
  146. wisent/{synthetic → core/synthetic}/cleaners/methods/base_dedupers.py +2 -2
  147. wisent/{synthetic → core/synthetic}/cleaners/methods/base_refusalers.py +1 -1
  148. wisent/{synthetic → core/synthetic}/cleaners/pairs_cleaner.py +5 -5
  149. wisent/{synthetic → core/synthetic}/cleaners/refusaler_cleaner.py +4 -4
  150. wisent/{synthetic → core/synthetic}/db_instructions/mini_dp.py +1 -1
  151. wisent/{synthetic → core/synthetic}/generators/diversities/methods/fast_diversity.py +1 -1
  152. wisent/{synthetic → core/synthetic}/generators/pairs_generator.py +38 -12
  153. wisent/core/tasks/livecodebench_task.py +4 -103
  154. wisent/core/timing_calibration.py +1 -1
  155. {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/METADATA +3 -3
  156. wisent-0.5.14.dist-info/RECORD +294 -0
  157. wisent-0.5.14.dist-info/entry_points.txt +2 -0
  158. wisent/benchmarks/coding/providers/livecodebench/provider.py +0 -53
  159. wisent/classifiers/core/atoms.py +0 -747
  160. wisent/classifiers/models/logistic.py +0 -29
  161. wisent/classifiers/models/mlp.py +0 -47
  162. wisent/cli/classifiers/classifier_rotator.py +0 -137
  163. wisent/cli/cli_logger.py +0 -142
  164. wisent/cli/wisent_cli/commands/help_cmd.py +0 -52
  165. wisent/cli/wisent_cli/commands/listing.py +0 -154
  166. wisent/cli/wisent_cli/commands/train_cmd.py +0 -322
  167. wisent/cli/wisent_cli/main.py +0 -93
  168. wisent/cli/wisent_cli/shell.py +0 -80
  169. wisent/cli/wisent_cli/ui.py +0 -69
  170. wisent/cli/wisent_cli/util/aggregations.py +0 -43
  171. wisent/cli/wisent_cli/util/parsing.py +0 -126
  172. wisent/cli/wisent_cli/version.py +0 -4
  173. wisent/opti/methods/__init__.py +0 -0
  174. wisent/synthetic/__init__.py +0 -0
  175. wisent/synthetic/cleaners/__init__.py +0 -0
  176. wisent/synthetic/cleaners/core/__init__.py +0 -0
  177. wisent/synthetic/cleaners/methods/__init__.py +0 -0
  178. wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
  179. wisent/synthetic/db_instructions/__init__.py +0 -0
  180. wisent/synthetic/db_instructions/core/__init__.py +0 -0
  181. wisent/synthetic/generators/__init__.py +0 -0
  182. wisent/synthetic/generators/core/__init__.py +0 -0
  183. wisent/synthetic/generators/diversities/__init__.py +0 -0
  184. wisent/synthetic/generators/diversities/core/__init__.py +0 -0
  185. wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
  186. wisent-0.5.12.dist-info/RECORD +0 -220
  187. /wisent/{benchmarks → core/evaluators/benchmark_specific/coding}/__init__.py +0 -0
  188. /wisent/{benchmarks/coding → core/evaluators/benchmark_specific/coding/metrics}/__init__.py +0 -0
  189. /wisent/{benchmarks/coding/metrics → core/evaluators/benchmark_specific/coding/metrics/core}/__init__.py +0 -0
  190. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/core/atoms.py +0 -0
  191. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/passk.py +0 -0
  192. /wisent/{benchmarks/coding/metrics/core → core/evaluators/benchmark_specific/coding/output_sanitizer}/__init__.py +0 -0
  193. /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/output_sanitizer/core}/__init__.py +0 -0
  194. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/core/atoms.py +0 -0
  195. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/utils.py +0 -0
  196. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/__init__.py +0 -0
  197. /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/providers}/core/__init__.py +0 -0
  198. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/core/atoms.py +0 -0
  199. /wisent/{benchmarks/coding/providers/core → core/evaluators/benchmark_specific/coding/safe_docker}/__init__.py +0 -0
  200. /wisent/{benchmarks/coding/providers/livecodebench → core/evaluators/benchmark_specific/coding/safe_docker/core}/__init__.py +0 -0
  201. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/atoms.py +0 -0
  202. /wisent/{benchmarks/coding/safe_docker → core/opti}/__init__.py +0 -0
  203. /wisent/{benchmarks/coding/safe_docker → core/opti}/core/__init__.py +0 -0
  204. /wisent/{opti → core/opti}/core/atoms.py +0 -0
  205. /wisent/{classifiers → core/opti/methods}/__init__.py +0 -0
  206. /wisent/{opti → core/opti}/methods/opti_classificator.py +0 -0
  207. /wisent/{opti → core/opti}/methods/opti_steering.py +0 -0
  208. /wisent/{classifiers/core → core/synthetic}/__init__.py +0 -0
  209. /wisent/{classifiers/models → core/synthetic/cleaners}/__init__.py +0 -0
  210. /wisent/{cli → core/synthetic/cleaners/core}/__init__.py +0 -0
  211. /wisent/{synthetic → core/synthetic}/cleaners/core/atoms.py +0 -0
  212. /wisent/{cli/classifiers → core/synthetic/cleaners/methods}/__init__.py +0 -0
  213. /wisent/{cli/data_loaders → core/synthetic/cleaners/methods/core}/__init__.py +0 -0
  214. /wisent/{synthetic → core/synthetic}/cleaners/methods/core/atoms.py +0 -0
  215. /wisent/{cli/evaluators → core/synthetic/db_instructions}/__init__.py +0 -0
  216. /wisent/{cli/steering_methods → core/synthetic/db_instructions/core}/__init__.py +0 -0
  217. /wisent/{synthetic → core/synthetic}/db_instructions/core/atoms.py +0 -0
  218. /wisent/{cli/wisent_cli → core/synthetic/generators}/__init__.py +0 -0
  219. /wisent/{cli/wisent_cli/commands → core/synthetic/generators/core}/__init__.py +0 -0
  220. /wisent/{synthetic → core/synthetic}/generators/core/atoms.py +0 -0
  221. /wisent/{cli/wisent_cli/util → core/synthetic/generators/diversities}/__init__.py +0 -0
  222. /wisent/{opti → core/synthetic/generators/diversities/core}/__init__.py +0 -0
  223. /wisent/{synthetic → core/synthetic}/generators/diversities/core/core.py +0 -0
  224. /wisent/{opti/core → core/synthetic/generators/diversities/methods}/__init__.py +0 -0
  225. {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/WHEEL +0 -0
  226. {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/licenses/LICENSE +0 -0
  227. {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/top_level.txt +0 -0
wisent/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.5.12"
1
+ __version__ = "0.5.14"
@@ -0,0 +1,26 @@
1
+ """Activation collection and management."""
2
+
3
+ from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
4
+ from wisent.core.activations.core.atoms import (
5
+ ActivationAggregationStrategy,
6
+ LayerActivations,
7
+ )
8
+
9
+ __all__ = [
10
+ "ActivationCollector",
11
+ "Activations",
12
+ "PromptConstructionStrategy",
13
+ "ActivationAggregationStrategy",
14
+ "LayerActivations",
15
+ ]
16
+
17
+
18
+ def __getattr__(name):
19
+ """Lazy import to avoid circular dependencies."""
20
+ if name == "ActivationCollector":
21
+ from wisent.core.activations.activations_collector import ActivationCollector
22
+ return ActivationCollector
23
+ if name == "Activations":
24
+ from wisent.core.activations.activations import Activations
25
+ return Activations
26
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,96 @@
1
+ """Activation wrapper for classifier feature extraction."""
2
+
3
+ from typing import Any
4
+ import torch
5
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
6
+
7
+
8
+ class Activations:
9
+ """Wrapper for activation tensors with aggregation strategy.
10
+
11
+ This class wraps activation tensors and provides methods to extract
12
+ features for classifier input based on the specified aggregation strategy.
13
+ """
14
+
15
+ def __init__(self, tensor: torch.Tensor, layer: Any, aggregation_strategy):
16
+ """Initialize Activations wrapper.
17
+
18
+ Args:
19
+ tensor: Activation tensor (typically shape [batch, seq_len, hidden_dim])
20
+ layer: Layer object containing layer metadata
21
+ aggregation_strategy: Strategy for aggregating tokens (string or ActivationAggregationStrategy enum)
22
+ """
23
+ self.tensor = tensor
24
+ self.layer = layer
25
+
26
+ # Convert string to enum if needed
27
+ if isinstance(aggregation_strategy, str):
28
+ # Map common string values to enum
29
+ strategy_map = {
30
+ "average": ActivationAggregationStrategy.MEAN_POOLING,
31
+ "mean": ActivationAggregationStrategy.MEAN_POOLING,
32
+ "final": ActivationAggregationStrategy.LAST_TOKEN,
33
+ "last": ActivationAggregationStrategy.LAST_TOKEN,
34
+ "first": ActivationAggregationStrategy.FIRST_TOKEN,
35
+ "max": ActivationAggregationStrategy.MAX_POOLING,
36
+ "mean_pooling": ActivationAggregationStrategy.MEAN_POOLING,
37
+ "last_token": ActivationAggregationStrategy.LAST_TOKEN,
38
+ "first_token": ActivationAggregationStrategy.FIRST_TOKEN,
39
+ "max_pooling": ActivationAggregationStrategy.MAX_POOLING,
40
+ }
41
+ self.aggregation_strategy = strategy_map.get(
42
+ aggregation_strategy.lower(),
43
+ ActivationAggregationStrategy.MEAN_POOLING
44
+ )
45
+ else:
46
+ self.aggregation_strategy = aggregation_strategy
47
+
48
+ def extract_features_for_classifier(self) -> torch.Tensor:
49
+ """Extract features from activations for classifier input.
50
+
51
+ Aggregates the activation tensor based on the specified strategy
52
+ to produce a single feature vector suitable for classifier input.
53
+
54
+ Returns:
55
+ torch.Tensor: Aggregated features (typically shape [hidden_dim])
56
+ """
57
+ if self.tensor is None:
58
+ raise ValueError("Cannot extract features from None tensor")
59
+
60
+ # Ensure tensor is 3D: [batch, seq_len, hidden_dim]
61
+ if len(self.tensor.shape) == 2:
62
+ # If [seq_len, hidden_dim], add batch dimension
63
+ tensor = self.tensor.unsqueeze(0)
64
+ else:
65
+ tensor = self.tensor
66
+
67
+ # Apply aggregation strategy
68
+ if self.aggregation_strategy == ActivationAggregationStrategy.MEAN_POOLING:
69
+ # Average over sequence length dimension
70
+ features = tensor.mean(dim=1).squeeze(0)
71
+ elif self.aggregation_strategy == ActivationAggregationStrategy.LAST_TOKEN:
72
+ # Take last token
73
+ features = tensor[:, -1, :].squeeze(0)
74
+ elif self.aggregation_strategy == ActivationAggregationStrategy.FIRST_TOKEN:
75
+ # Take first token
76
+ features = tensor[:, 0, :].squeeze(0)
77
+ elif self.aggregation_strategy == ActivationAggregationStrategy.MAX_POOLING:
78
+ # Max over sequence length dimension
79
+ features = tensor.max(dim=1)[0].squeeze(0)
80
+ else:
81
+ # Default to mean pooling
82
+ features = tensor.mean(dim=1).squeeze(0)
83
+
84
+ return features
85
+
86
+ def cpu(self):
87
+ """Move tensor to CPU."""
88
+ if self.tensor is not None:
89
+ self.tensor = self.tensor.cpu()
90
+ return self
91
+
92
+ def detach(self):
93
+ """Detach tensor from computation graph."""
94
+ if self.tensor is not None:
95
+ self.tensor = self.tensor.detach()
96
+ return self
@@ -1,12 +1,16 @@
1
1
  from __future__ import annotations
2
2
  from dataclasses import dataclass
3
- from typing import Sequence
3
+ from typing import Sequence, TYPE_CHECKING
4
4
  import torch
5
5
 
6
6
 
7
7
  from wisent.core.contrastive_pairs.core.pair import ContrastivePair
8
8
  from wisent.core.activations.core.atoms import LayerActivations, ActivationAggregationStrategy, LayerName, RawActivationMap
9
- from wisent.core.models.wisent_model import WisentModel
9
+ from wisent.core.activations.prompt_construction_strategy import PromptConstructionStrategy
10
+
11
+ if TYPE_CHECKING:
12
+ from wisent.core.models.wisent_model import WisentModel
13
+
10
14
  __all__ = ["ActivationCollector"]
11
15
 
12
16
  @dataclass(slots=True)
@@ -125,22 +129,23 @@ class ActivationCollector:
125
129
  }
126
130
  """
127
131
 
128
- model: WisentModel
132
+ model: "WisentModel"
129
133
  store_device: str | torch.device = "cpu"
130
134
  dtype: torch.dtype | None = None
131
135
 
132
136
  def collect_for_pair(
133
137
  self,
134
138
  pair: ContrastivePair,
135
- layers: Sequence[LayerName] | None = None,
139
+ layers: Sequence[LayerName] | None = None,
136
140
  aggregation: ActivationAggregationStrategy = ActivationAggregationStrategy.CONTINUATION_TOKEN,
137
141
  return_full_sequence: bool = False,
138
142
  normalize_layers: bool = False,
143
+ prompt_strategy: PromptConstructionStrategy = PromptConstructionStrategy.CHAT_TEMPLATE,
139
144
  ) -> ContrastivePair:
140
145
  pos = self._collect_for_texts(pair.prompt, _resp_text(pair.positive_response),
141
- layers, aggregation, return_full_sequence, normalize_layers)
146
+ layers, aggregation, return_full_sequence, normalize_layers, prompt_strategy)
142
147
  neg = self._collect_for_texts(pair.prompt, _resp_text(pair.negative_response),
143
- layers, aggregation, return_full_sequence, normalize_layers)
148
+ layers, aggregation, return_full_sequence, normalize_layers, prompt_strategy)
144
149
  return pair.with_activations(positive=pos, negative=neg)
145
150
 
146
151
  def _collect_for_texts(
@@ -151,25 +156,16 @@ class ActivationCollector:
151
156
  aggregation: ActivationAggregationStrategy,
152
157
  return_full_sequence: bool,
153
158
  normalize_layers: bool = False,
159
+ prompt_strategy: PromptConstructionStrategy = PromptConstructionStrategy.CHAT_TEMPLATE,
154
160
  ) -> LayerActivations:
155
-
161
+
156
162
  self._ensure_eval_mode()
157
163
  with torch.inference_mode():
158
164
  tok = self.model.tokenizer # type: ignore[union-attr]
159
- if not hasattr(tok, "apply_chat_template"):
160
- raise RuntimeError("Tokenizer has no apply_chat_template; set it up or use a non-chat path.")
161
165
 
162
- # 1) Build templated strings
163
- prompt_text = tok.apply_chat_template(
164
- [{"role": "user", "content": prompt}],
165
- tokenize=False,
166
- add_generation_prompt=True,
167
- )
168
- full_text = tok.apply_chat_template(
169
- [{"role": "user", "content": prompt},
170
- {"role": "assistant", "content": response}],
171
- tokenize=False,
172
- add_generation_prompt=False,
166
+ # 1) Build prompts based on strategy
167
+ prompt_text, full_text = self._build_prompts_for_strategy(
168
+ prompt, response, prompt_strategy, tok
173
169
  )
174
170
 
175
171
  # 2) Tokenize both with identical flags
@@ -217,6 +213,61 @@ class ActivationCollector:
217
213
  activation_aggregation_strategy=None if return_full_sequence else aggregation,
218
214
  )
219
215
 
216
+ def _build_prompts_for_strategy(
217
+ self,
218
+ prompt: str,
219
+ response: str,
220
+ strategy: PromptConstructionStrategy,
221
+ tokenizer
222
+ ) -> tuple[str, str]:
223
+ """
224
+ Build prompt_text and full_text based on the chosen prompt construction strategy.
225
+
226
+ Returns:
227
+ (prompt_text, full_text): Tuple of prompt-only text and prompt+response text
228
+ """
229
+ if strategy == PromptConstructionStrategy.CHAT_TEMPLATE:
230
+ # Use model's built-in chat template
231
+ if not hasattr(tokenizer, "apply_chat_template"):
232
+ raise RuntimeError("Tokenizer has no apply_chat_template; set it up or use a different strategy.")
233
+ prompt_text = tokenizer.apply_chat_template(
234
+ [{"role": "user", "content": prompt}],
235
+ tokenize=False,
236
+ add_generation_prompt=True,
237
+ )
238
+ full_text = tokenizer.apply_chat_template(
239
+ [{"role": "user", "content": prompt},
240
+ {"role": "assistant", "content": response}],
241
+ tokenize=False,
242
+ add_generation_prompt=False,
243
+ )
244
+
245
+ elif strategy == PromptConstructionStrategy.DIRECT_COMPLETION:
246
+ # Q → good_resp/bad_resp (direct answer)
247
+ prompt_text = prompt
248
+ full_text = f"{prompt} {response}"
249
+
250
+ elif strategy == PromptConstructionStrategy.INSTRUCTION_FOLLOWING:
251
+ # [INST] Q [/INST] → good_resp/bad_resp (instruction format)
252
+ prompt_text = f"[INST] {prompt} [/INST]"
253
+ full_text = f"[INST] {prompt} [/INST] {response}"
254
+
255
+ elif strategy == PromptConstructionStrategy.MULTIPLE_CHOICE:
256
+ # Which is better: Q A. bad B. good → "A"/"B" (choice format)
257
+ # For multiple choice, we expect response to be "A" or "B"
258
+ prompt_text = f"Which is better: {prompt} A. [bad response] B. [good response]\nAnswer:"
259
+ full_text = f"{prompt_text} {response}"
260
+
261
+ elif strategy == PromptConstructionStrategy.ROLE_PLAYING:
262
+ # Behave like person who would answer Q with good_resp → "I" (role assumption)
263
+ prompt_text = f"Behave like a person who would answer '{prompt}' with '{response}'. Say 'I' to confirm:"
264
+ full_text = f"{prompt_text} I"
265
+
266
+ else:
267
+ raise ValueError(f"Unknown prompt construction strategy: {strategy}")
268
+
269
+ return prompt_text, full_text
270
+
220
271
  def _select_indices(self, layer_names: Sequence[str] | None, n_blocks: int) -> list[int]:
221
272
  """Map layer names '1'..'L' -> indices 0..L-1."""
222
273
  if not layer_names:
@@ -0,0 +1,47 @@
1
+ """Prompt construction strategies for activation collection."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class PromptConstructionStrategy(Enum):
7
+ """
8
+ Strategies for constructing prompts from question-answer pairs.
9
+
10
+ These strategies determine how the prompt and response are formatted
11
+ before being passed to the model for activation extraction.
12
+ """
13
+
14
+ MULTIPLE_CHOICE = "multiple_choice"
15
+ """
16
+ Format: Which is better: Q A. bad B. good → "A"/"B" (choice format)
17
+ Example: "Which is better: What is 2+2? A. 5 B. 4"
18
+ Response: "A" or "B"
19
+ """
20
+
21
+ ROLE_PLAYING = "role_playing"
22
+ """
23
+ Format: Behave like person who would answer Q with good_resp → "I" (role assumption)
24
+ Example: "Behave like a person who would answer 'What is 2+2?' with '4'"
25
+ Response: "I"
26
+ """
27
+
28
+ DIRECT_COMPLETION = "direct_completion"
29
+ """
30
+ Format: Q → good_resp/bad_resp (direct answer)
31
+ Example: "What is 2+2?"
32
+ Response: "4" or "5"
33
+ """
34
+
35
+ INSTRUCTION_FOLLOWING = "instruction_following"
36
+ """
37
+ Format: [INST] Q [/INST] → good_resp/bad_resp (instruction format)
38
+ Example: "[INST] What is 2+2? [/INST]"
39
+ Response: "4" or "5"
40
+ """
41
+
42
+ CHAT_TEMPLATE = "chat_template"
43
+ """
44
+ Format: Uses the model's built-in chat template
45
+ Example: <|start_header_id|>user<|end_header_id|>What is 2+2?<|eot_id|>
46
+ Response: Model's chat-formatted response
47
+ """
@@ -1,18 +1 @@
1
- """
2
- Agent module for wisent-guard autonomous systems.
3
-
4
- This module provides:
5
- - ResponseDiagnostics: Response analysis and quality assessment
6
- - ResponseSteering: Response improvement and steering
7
- - Data classes for analysis and improvement results
8
- """
9
-
10
- from .diagnose import ResponseDiagnostics, AnalysisResult
11
- from .steer import ResponseSteering, ImprovementResult
12
-
13
- __all__ = [
14
- 'ResponseDiagnostics',
15
- 'AnalysisResult',
16
- 'ResponseSteering',
17
- 'ImprovementResult'
18
- ]
1
+ # Empty __init__.py to avoid cascading import errors with empty __init__ pattern
@@ -276,7 +276,7 @@ class BudgetManager:
276
276
  return estimate_task_time("benchmark_eval", 100)
277
277
 
278
278
  except Exception as e:
279
- raise RuntimeError(f"Device benchmark estimate failed for task '{task_name}': {e}. Run device benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
279
+ raise RuntimeError(f"Device benchmark estimate failed for task '{task_name}': {e}. Run device benchmark first with: python -m wisent.core.agent.budget benchmark")
280
280
 
281
281
  elif resource_type == ResourceType.MEMORY:
282
282
  raise RuntimeError(f"Memory estimation not implemented for task '{task_name}'")
@@ -348,7 +348,7 @@ def calculate_max_tasks_for_time_budget(task_type: str = "benchmark_evaluation",
348
348
  return max_tasks
349
349
 
350
350
  except Exception as e:
351
- raise RuntimeError(f"Budget calculation failed for task '{task_type}': {e}. Run device benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
351
+ raise RuntimeError(f"Budget calculation failed for task '{task_type}': {e}. Run device benchmark first with: python -m wisent.core.agent.budget benchmark")
352
352
 
353
353
 
354
354
  def optimize_tasks_for_budget(task_candidates: List[str],
@@ -629,7 +629,7 @@ except Exception as e:
629
629
  """
630
630
  benchmark = self.get_current_benchmark()
631
631
  if not benchmark:
632
- raise RuntimeError(f"No benchmark available for device. Run benchmark first with: python -m wisent_guard.core.agent.budget benchmark")
632
+ raise RuntimeError(f"No benchmark available for device. Run benchmark first with: python -m wisent.core.agent.budget benchmark")
633
633
  else:
634
634
  # Use actual benchmark results
635
635
  if task_type == "model_loading":
@@ -1,55 +1 @@
1
- """
2
- Diagnostic module for autonomous agent.
3
-
4
- This module provides:
5
- - Classifier selection and auto-discovery
6
- - On-the-fly classifier creation
7
- - Response analysis and quality assessment
8
- """
9
-
10
- # Response diagnostics
11
- from .response_diagnostics import ResponseDiagnostics, AnalysisResult
12
-
13
- # Classifier management
14
- from .select_classifiers import ClassifierSelector, ClassifierInfo, SelectionCriteria, auto_select_classifiers_for_agent
15
- from .create_classifier import ClassifierCreator, TrainingConfig, TrainingResult, create_classifier_on_demand
16
-
17
- # New marketplace system
18
- from .classifier_marketplace import (
19
- ClassifierMarketplace,
20
- ClassifierListing,
21
- ClassifierCreationEstimate
22
- )
23
-
24
- # Agent decision system
25
- from .agent_classifier_decision import (
26
- AgentClassifierDecisionSystem,
27
- TaskAnalysis,
28
- ClassifierDecision
29
- )
30
-
31
- __all__ = [
32
- # Response diagnostics
33
- 'ResponseDiagnostics',
34
- 'AnalysisResult',
35
-
36
- # Legacy classifier management (for backward compatibility)
37
- 'ClassifierSelector',
38
- 'ClassifierInfo',
39
- 'SelectionCriteria',
40
- 'auto_select_classifiers_for_agent',
41
- 'ClassifierCreator',
42
- 'TrainingConfig',
43
- 'TrainingResult',
44
- 'create_classifier_on_demand',
45
-
46
- # New marketplace system
47
- 'ClassifierMarketplace',
48
- 'ClassifierListing',
49
- 'ClassifierCreationEstimate',
50
-
51
- # Agent decision system
52
- 'AgentClassifierDecisionSystem',
53
- 'TaskAnalysis',
54
- 'ClassifierDecision'
55
- ]
1
+ # Empty __init__.py to avoid cascading import errors with empty __init__ pattern
@@ -53,9 +53,9 @@ class ClassifierMarketplace:
53
53
  self.search_paths = search_paths or [
54
54
  "./models/",
55
55
  "./classifiers/",
56
- "./wisent_guard/models/",
57
- "./wisent_guard/classifiers/",
58
- "./wisent_guard/core/classifiers/"
56
+ "./wisent/models/",
57
+ "./wisent/classifiers/",
58
+ "./wisent/core/classifiers/"
59
59
  ]
60
60
  self.available_classifiers: List[ClassifierListing] = []
61
61
  self._training_time_cache = {}
@@ -75,8 +75,8 @@ class ClassifierMarketplace:
75
75
  if not os.path.exists(search_path):
76
76
  continue
77
77
 
78
- # For wisent_guard/core/classifiers, search recursively for the nested structure
79
- if "wisent_guard/core/classifiers" in search_path:
78
+ # For wisent/core/classifiers, search recursively for the nested structure
79
+ if "wisent/core/classifiers" in search_path:
80
80
  import glob
81
81
  pattern = os.path.join(search_path, "**", "*.pkl")
82
82
  classifier_files = glob.glob(pattern, recursive=True)
@@ -163,9 +163,9 @@ class ClassifierMarketplace:
163
163
  """Parse layer and issue type from filename."""
164
164
  filename = os.path.basename(filepath).lower()
165
165
 
166
- # Check if this is from wisent_guard/core/classifiers with nested structure
167
- if "wisent_guard/core/classifiers" in filepath:
168
- # Extract from path structure: wisent_guard/core/classifiers/{model}/{benchmark}/layer_{layer}.pkl
166
+ # Check if this is from wisent/core/classifiers with nested structure
167
+ if "wisent/core/classifiers" in filepath:
168
+ # Extract from path structure: wisent/core/classifiers/{model}/{benchmark}/layer_{layer}.pkl
169
169
  path_parts = filepath.split(os.sep)
170
170
 
171
171
  # Find the benchmark name (second to last directory)
@@ -11,11 +11,11 @@ This module handles:
11
11
  from dataclasses import dataclass
12
12
  from typing import Any, Dict, List
13
13
 
14
- from wisent.core.activations import ActivationAggregationStrategy, Activations
14
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
15
+ from wisent.core.activations.activations import Activations
15
16
  from wisent.core.classifier.classifier import Classifier
16
-
17
- from ...layer import Layer
18
- from ...model import Model
17
+ from wisent.core.layer import Layer
18
+ from wisent.core.model import Model
19
19
 
20
20
 
21
21
  @dataclass
@@ -193,7 +193,7 @@ class SyntheticClassifierFactory:
193
193
  logging.info("Starting classifier training...")
194
194
  try:
195
195
  # Convert activations to the format expected by train_on_activations method
196
- from wisent.core.activations import Activations
196
+ from wisent.core.activations.activations import Activations
197
197
 
198
198
  # Convert torch tensors to Activations objects if needed
199
199
  harmful_activations = []
@@ -331,7 +331,7 @@ def handle_configurable_group_task(task_name: str):
331
331
  # Look for existing YAML files in common directories
332
332
  yaml_candidates = []
333
333
  search_dirs = [
334
- "wisent_guard/parameters/tasks",
334
+ "wisent/parameters/tasks",
335
335
  ".",
336
336
  "tasks",
337
337
  "configs"
@@ -891,7 +891,7 @@ def save_custom_task_yaml(task_name: str, yaml_content: str) -> Optional[str]:
891
891
  """
892
892
  try:
893
893
  # Create the tasks directory if it doesn't exist
894
- tasks_dir = os.path.join("wisent_guard", "parameters", "tasks")
894
+ tasks_dir = os.path.join("wisent", "parameters", "tasks")
895
895
  os.makedirs(tasks_dir, exist_ok=True)
896
896
 
897
897
  # Save the YAML content to a file
@@ -993,7 +993,7 @@ def create_flan_held_in_files() -> Optional[str]:
993
993
  """
994
994
  try:
995
995
  # Create the tasks directory
996
- tasks_dir = os.path.join("wisent_guard", "parameters", "tasks")
996
+ tasks_dir = os.path.join("wisent", "parameters", "tasks")
997
997
  os.makedirs(tasks_dir, exist_ok=True)
998
998
 
999
999
  # Create the template file first
@@ -11,7 +11,8 @@ This module handles:
11
11
  from dataclasses import dataclass
12
12
  from typing import Any, Dict, List
13
13
 
14
- from wisent.core.activations import ActivationAggregationStrategy, Activations
14
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
15
+ from wisent.core.activations.activations import Activations
15
16
  from wisent.core.classifier.classifier import Classifier
16
17
 
17
18
  from ..layer import Layer
@@ -12,7 +12,8 @@ A model that can autonomously use wisent-guard capabilities on itself:
12
12
  import asyncio
13
13
  from typing import Any, Dict, List, Optional
14
14
 
15
- from wisent.core.activations import ActivationAggregationStrategy, Activations
15
+ from wisent.core.activations.core.atoms import ActivationAggregationStrategy
16
+ from wisent.core.activations.activations import Activations
16
17
 
17
18
  from .agent.diagnose import AgentClassifierDecisionSystem, AnalysisResult, ClassifierMarketplace, ResponseDiagnostics
18
19
  from .agent.steer import ImprovementResult, ResponseSteering
@@ -768,9 +769,16 @@ class AutonomousAgent:
768
769
  if not classifier_config:
769
770
  return None
770
771
 
772
+ # Validate required classifier configuration
773
+ if "layer" not in classifier_config:
774
+ raise ValueError(
775
+ "Classifier configuration must include 'layer' parameter. "
776
+ "Please ensure your classifier configuration file specifies the optimal layer."
777
+ )
778
+
771
779
  # Create ClassifierParams from stored data
772
780
  params = ClassifierParams(
773
- optimal_layer=classifier_config.get("layer", 15),
781
+ optimal_layer=classifier_config["layer"],
774
782
  classification_threshold=classifier_config.get("threshold", 0.5),
775
783
  training_samples=classifier_config.get("samples", 25),
776
784
  classifier_type=classifier_config.get("type", "logistic"),